#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)                                                  
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
## 
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
## 
##     nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
## 
##     compare
## The following object is masked from 'package:class':
## 
##     knn
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8    2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## The following object is masked from 'package:kernlab':
## 
##     alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ ggplot2::alpha()        masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine()        masks randomForest::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ purrr::cross()          masks kernlab::cross()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ tidyr::expand()         masks Matrix::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::margin()       masks randomForest::margin()
## ✖ purrr::none()           masks locfit::none()
## ✖ tidyr::pack()           masks Matrix::pack()
## ✖ purrr::simplify()       masks igraph::simplify()
## ✖ tidyr::unpack()         masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## The following objects are masked from 'package:rstanarm':
## 
##     compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
## 
## Attaching package: 'TDA'
## 
## The following object is masked from 'package:cluster':
## 
##     silhouette
library(TDAstats)
library(ks)
## 
## Attaching package: 'ks'
## 
## The following object is masked from 'package:TDA':
## 
##     kde
## 
## The following object is masked from 'package:MCMCpack':
## 
##     vech
## 
## The following object is masked from 'package:igraph':
## 
##     compare
## 
## The following object is masked from 'package:BayesFactor':
## 
##     compare
#install.packages('MLmetrics')
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## 
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## 
## The following object is masked from 'package:base':
## 
##     Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#import adult dataset from UCI repository stored on my desktop

#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
  head(str(adult))
## 'data.frame':    32561 obs. of  15 variables:
##  $ V1 : int  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2 : chr  " State-gov" " Self-emp-not-inc" " Private" " Private" ...
##  $ V3 : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
##  $ V4 : chr  " Bachelors" " Bachelors" " HS-grad" " 11th" ...
##  $ V5 : int  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6 : chr  " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
##  $ V7 : chr  " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
##  $ V8 : chr  " Not-in-family" " Husband" " Not-in-family" " Husband" ...
##  $ V9 : chr  " White" " White" " White" " Black" ...
##  $ V10: chr  " Male" " Male" " Male" " Male" ...
##  $ V11: int  2174 0 0 0 0 0 0 0 14084 5178 ...
##  $ V12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13: int  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14: chr  " United-States" " United-States" " United-States" " United-States" ...
##  $ V15: chr  " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
  summary(adult)
##        V1             V2                  V3               V4           
##  Min.   :17.00   Length:32561       Min.   :  12285   Length:32561      
##  1st Qu.:28.00   Class :character   1st Qu.: 117827   Class :character  
##  Median :37.00   Mode  :character   Median : 178356   Mode  :character  
##  Mean   :38.58                      Mean   : 189778                     
##  3rd Qu.:48.00                      3rd Qu.: 237051                     
##  Max.   :90.00                      Max.   :1484705                     
##        V5             V6                 V7                 V8           
##  Min.   : 1.00   Length:32561       Length:32561       Length:32561      
##  1st Qu.: 9.00   Class :character   Class :character   Class :character  
##  Median :10.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :10.08                                                           
##  3rd Qu.:12.00                                                           
##  Max.   :16.00                                                           
##       V9                V10                 V11             V12        
##  Length:32561       Length:32561       Min.   :    0   Min.   :   0.0  
##  Class :character   Class :character   1st Qu.:    0   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Median :    0   Median :   0.0  
##                                        Mean   : 1078   Mean   :  87.3  
##                                        3rd Qu.:    0   3rd Qu.:   0.0  
##                                        Max.   :99999   Max.   :4356.0  
##       V13            V14                V15           
##  Min.   : 1.00   Length:32561       Length:32561      
##  1st Qu.:40.00   Class :character   Class :character  
##  Median :40.00   Mode  :character   Mode  :character  
##  Mean   :40.44                                        
##  3rd Qu.:45.00                                        
##  Max.   :99.00
  ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions

#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {

  library(MCMCpack)

  samples <- 3000

  #build the vector 0.5 1 1 ....... 1 

  weights <- c(0.5,rep(1,length(diffVector)))

  #add the fake first observation in 0

  diffVector <- c (0, diffVector)  


  #for the moment we implement the sign test. Signedrank will follows

  probLeft <- mean (diffVector < rope_min)

  probRope <- mean (diffVector > rope_min & diffVector < rope_max)

  probRight <- mean (diffVector > rope_max)

  results = list ("probLeft"=probLeft, "probRope"=probRope,
                  
                  "probRight"=probRight)
  
  return (results)
}


##Create function to conduct Bayesian Signed Rank Test

BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
  
  library(MCMCpack)
  
  samples <- 30000
  
  #build the vector 0.5 1 1 ....... 1
  weights <- c(0.5,rep(1,length(diffVector)))
  
  #add the fake first observation in 0
  diffVector <- c (0, diffVector)
  
  sampledWeights <- rdirichlet(samples,weights)
  
  winLeft <- vector(length = samples)
  winRope <- vector(length = samples)
  winRight <- vector(length = samples)
  
  for (rep in 1:samples){
    currentWeights <- sampledWeights[rep,]
    for (i in 1:length(currentWeights)){
      for (j in 1:length(currentWeights)){
        product= currentWeights[i] * currentWeights[j]
        if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
          winRight[rep] <- winRight[rep] + product
        }
        else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
          winRope[rep] <- winRope[rep] + product
        }
        else {
          winLeft[rep] <- winLeft[rep] + product
        }

      }
    }
    maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
    winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
    winRight[rep] <- (winRight[rep]==maxWins)*1/winners
    winRope[rep] <- (winRope[rep]==maxWins)*1/winners
    winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
  }
  
  
  results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
                  "winRight"=mean(winRight) )
  return (results)
  
}


#Create function to conduct the Bayesian Correlated t.test

#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.

#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
 
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
   if (rope_max < rope_min){
     stop("rope_max should be larger than rope_min")
   }
     
  delta <- mean(diff_a_b)
  n <- length(diff_a_b)
  df <- n-1
  stdX <- sd(diff_a_b)
  sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
  p.left <- pt((rope_min - delta)/sp, df)
  p.rope <- pt((rope_max - delta)/sp, df)-p.left
  results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
  return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)

#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)

#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))


#str final data frame
head(str(adult.one_hot_df))
## 'data.frame':    32561 obs. of  110 variables:
##  $ V1                            : num  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  0 0 1 1 1 1 1 0 1 1 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ V2.State.gov                  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  77516 83311 215646 234721 338409 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  1 1 0 0 1 0 0 0 0 1 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ V4.Masters                    : num  0 0 0 0 0 1 0 0 1 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V5                            : num  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6.Divorced                   : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  0 1 0 1 1 1 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V6.Never.married              : num  1 0 0 0 0 0 0 0 1 0 ...
##  $ V6.Separated                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Adm.clerical               : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 1 0 0 0 1 0 1 0 1 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  0 1 0 1 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  1 0 1 0 0 0 1 0 1 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Unmarried                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 1 1 0 1 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 0 0 1 0 1 1 1 ...
##  $ V10.Female                    : num  0 0 0 0 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 1 1 1 0 0 0 1 0 1 ...
##  $ V11                           : num  2174 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]

##Persistent homology of adult dataset

#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset. 

adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame':    1000 obs. of  110 variables:
##  $ V1                            : num  33 25 39 21 32 26 20 58 24 63 ...
##  $ V2..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  1 1 1 1 1 1 0 0 1 0 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ V2.State.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  176992 105693 234901 198050 134886 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  0 1 0 0 0 1 0 0 0 0 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V4.Masters                    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 1 0 0 1 ...
##  $ V5                            : num  14 13 12 12 9 13 10 7 9 10 ...
##  $ V6.Divorced                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  1 0 0 0 1 0 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Never.married              : num  0 1 0 1 0 1 1 0 1 0 ...
##  $ V6.Separated                  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V7.Adm.clerical               : num  0 0 1 1 1 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 0 0 1 1 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  1 1 0 0 0 1 0 0 0 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  1 0 0 0 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ V8.Unmarried                  : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10.Female                    : num  0 1 0 1 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 0 1 0 0 0 0 1 0 1 ...
##  $ V11                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 40 40 25 40 40 20 16 25 48 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
##        V1             V2..       V2.Federal.gov   V2.Local.gov  
##  Min.   :17.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:28.00   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :37.00   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :38.64   Mean   :0.077   Mean   :0.025   Mean   :0.064  
##  3rd Qu.:47.00   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :90.00   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V2.Never.worked   V2.Private    V2.Self.emp.inc V2.Self.emp.not.inc
##  Min.   :0       Min.   :0.000   Min.   :0.000   Min.   :0.000      
##  1st Qu.:0       1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000      
##  Median :0       Median :1.000   Median :0.000   Median :0.000      
##  Mean   :0       Mean   :0.679   Mean   :0.037   Mean   :0.079      
##  3rd Qu.:0       3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000      
##  Max.   :0       Max.   :1.000   Max.   :1.000   Max.   :1.000      
##   V2.State.gov   V2.Without.pay       V3            V4.10th     
##  Min.   :0.000   Min.   :0      Min.   : 19302   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0      1st Qu.:123797   1st Qu.:0.000  
##  Median :0.000   Median :0      Median :181982   Median :0.000  
##  Mean   :0.039   Mean   :0      Mean   :195583   Mean   :0.041  
##  3rd Qu.:0.000   3rd Qu.:0      3rd Qu.:242529   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0      Max.   :721161   Max.   :1.000  
##     V4.11th         V4.12th        V4.1st.4th      V4.5th.6th   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.032   Mean   :0.015   Mean   :0.005   Mean   :0.015  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##    V4.7th.8th        V4.9th      V4.Assoc.acdm   V4.Assoc.voc    V4.Bachelors  
##  Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.00   Median :0.000   Median :0.000  
##  Mean   :0.015   Mean   :0.018   Mean   :0.04   Mean   :0.052   Mean   :0.155  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.00   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.00   Max.   :1.000   Max.   :1.000  
##   V4.Doctorate     V4.HS.grad      V4.Masters     V4.Preschool  
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.014   Mean   :0.327   Mean   :0.053   Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V4.Prof.school  V4.Some.college       V5      V6.Divorced   
##  Min.   :0.000   Min.   :0.000   Min.   : 1   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.: 9   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :10   Median :0.000  
##  Mean   :0.014   Mean   :0.202   Mean   :10   Mean   :0.132  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:12   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :16   Max.   :1.000  
##  V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
##  Min.   :0            Min.   :0.000         Min.   :0.000           
##  1st Qu.:0            1st Qu.:0.000         1st Qu.:0.000           
##  Median :0            Median :0.000         Median :0.000           
##  Mean   :0            Mean   :0.464         Mean   :0.005           
##  3rd Qu.:0            3rd Qu.:1.000         3rd Qu.:0.000           
##  Max.   :0            Max.   :1.000         Max.   :1.000           
##  V6.Never.married  V6.Separated     V6.Widowed         V7..      
##  Min.   :0.000    Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000    1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000    Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.325    Mean   :0.041   Mean   :0.033   Mean   :0.077  
##  3rd Qu.:1.000    3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000    Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000     
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000     
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000     
##  Mean   :0.117   Mean   :0.001   Mean   :0.129   Mean   :0.124     
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000     
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000     
##  V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
##  Min.   :0.000      Min.   :0.000        Min.   :0.000        Min.   :0.000   
##  1st Qu.:0.000      1st Qu.:0.000        1st Qu.:0.000        1st Qu.:0.000   
##  Median :0.000      Median :0.000        Median :0.000        Median :0.000   
##  Mean   :0.029      Mean   :0.041        Mean   :0.071        Mean   :0.091   
##  3rd Qu.:0.000      3rd Qu.:0.000        3rd Qu.:0.000        3rd Qu.:0.000   
##  Max.   :1.000      Max.   :1.000        Max.   :1.000        Max.   :1.000   
##  V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv    V7.Sales    
##  Min.   :0.000      Min.   :0.000     Min.   :0.000      Min.   :0.000  
##  1st Qu.:0.000      1st Qu.:0.000     1st Qu.:0.000      1st Qu.:0.000  
##  Median :0.000      Median :0.000     Median :0.000      Median :0.000  
##  Mean   :0.003      Mean   :0.119     Mean   :0.018      Mean   :0.102  
##  3rd Qu.:0.000      3rd Qu.:0.000     3rd Qu.:0.000      3rd Qu.:0.000  
##  Max.   :1.000      Max.   :1.000     Max.   :1.000      Max.   :1.000  
##  V7.Tech.support V7.Transport.moving   V8.Husband   V8.Not.in.family
##  Min.   :0.000   Min.   :0.000       Min.   :0.00   Min.   :0.000   
##  1st Qu.:0.000   1st Qu.:0.000       1st Qu.:0.00   1st Qu.:0.000   
##  Median :0.000   Median :0.000       Median :0.00   Median :0.000   
##  Mean   :0.035   Mean   :0.043       Mean   :0.41   Mean   :0.261   
##  3rd Qu.:0.000   3rd Qu.:0.000       3rd Qu.:1.00   3rd Qu.:1.000   
##  Max.   :1.000   Max.   :1.000       Max.   :1.00   Max.   :1.000   
##  V8.Other.relative  V8.Own.child    V8.Unmarried      V8.Wife     
##  Min.   :0.000     Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000     1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000     Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.027     Mean   :0.136   Mean   :0.115   Mean   :0.051  
##  3rd Qu.:0.000     3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000     Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander    V9.Black        V9.Other    
##  Min.   :0.000         Min.   :0.000         Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000         1st Qu.:0.000         1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000         Median :0.000         Median :0.000   Median :0.000  
##  Mean   :0.014         Mean   :0.029         Mean   :0.104   Mean   :0.007  
##  3rd Qu.:0.000         3rd Qu.:0.000         3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000         Max.   :1.000         Max.   :1.000   Max.   :1.000  
##     V9.White       V10.Female       V10.Male          V11         
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0.0  
##  1st Qu.:1.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:    0.0  
##  Median :1.000   Median :0.000   Median :1.000   Median :    0.0  
##  Mean   :0.846   Mean   :0.339   Mean   :0.661   Mean   :  868.9  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:    0.0  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :99999.0  
##       V12               V13           V14..        V14.Cambodia   V14.Canada   
##  Min.   :   0.00   Min.   : 1.0   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:   0.00   1st Qu.:40.0   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :   0.00   Median :40.0   Median :0.000   Median :0     Median :0.000  
##  Mean   :  92.56   Mean   :40.5   Mean   :0.024   Mean   :0     Mean   :0.003  
##  3rd Qu.:   0.00   3rd Qu.:45.0   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :2457.00   Max.   :99.0   Max.   :1.000   Max.   :0     Max.   :1.000  
##    V14.China      V14.Columbia      V14.Cuba     V14.Dominican.Republic
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000         
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000         
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000         
##  Mean   :0.003   Mean   :0.002   Mean   :0.005   Mean   :0.002         
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000         
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000         
##   V14.Ecuador    V14.El.Salvador  V14.England      V14.France   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.003   Mean   :0.003   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##   V14.Germany      V14.Greece    V14.Guatemala   V14.Haiti    
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000  
##  Mean   :0.002   Mean   :0.002   Mean   :0     Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000  
##  V14.Holand.Netherlands  V14.Honduras    V14.Hong      V14.Hungary   
##  Min.   :0              Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0              1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0              Median :0     Median :0.000   Median :0.000  
##  Mean   :0              Mean   :0     Mean   :0.001   Mean   :0.002  
##  3rd Qu.:0              3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0              Max.   :0     Max.   :1.000   Max.   :1.000  
##    V14.India        V14.Iran  V14.Ireland   V14.Italy      V14.Jamaica   
##  Min.   :0.000   Min.   :0   Min.   :0    Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0    1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0   Median :0    Median :0.000   Median :0.000  
##  Mean   :0.004   Mean   :0   Mean   :0    Mean   :0.003   Mean   :0.003  
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0    3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0   Max.   :0    Max.   :1.000   Max.   :1.000  
##    V14.Japan        V14.Laos   V14.Mexico    V14.Nicaragua
##  Min.   :0.000   Min.   :0   Min.   :0.000   Min.   :0    
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0.000   1st Qu.:0    
##  Median :0.000   Median :0   Median :0.000   Median :0    
##  Mean   :0.003   Mean   :0   Mean   :0.022   Mean   :0    
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0    
##  Max.   :1.000   Max.   :0   Max.   :1.000   Max.   :0    
##  V14.Outlying.US.Guam.USVI.etc.    V14.Peru V14.Philippines   V14.Poland   
##  Min.   :0                      Min.   :0   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0                      1st Qu.:0   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0                      Median :0   Median :0.000   Median :0.000  
##  Mean   :0                      Mean   :0   Mean   :0.004   Mean   :0.002  
##  3rd Qu.:0                      3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0                      Max.   :0   Max.   :1.000   Max.   :1.000  
##   V14.Portugal   V14.Puerto.Rico  V14.Scotland   V14.South       V14.Taiwan   
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.004   Mean   :0     Mean   :0.001   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000   Max.   :1.000  
##   V14.Thailand V14.Trinadad.Tobago V14.United.States  V14.Vietnam   
##  Min.   :0     Min.   :0.000       Min.   :0.000     Min.   :0.000  
##  1st Qu.:0     1st Qu.:0.000       1st Qu.:1.000     1st Qu.:0.000  
##  Median :0     Median :0.000       Median :1.000     Median :0.000  
##  Mean   :0     Mean   :0.002       Mean   :0.891     Mean   :0.003  
##  3rd Qu.:0     3rd Qu.:0.000       3rd Qu.:1.000     3rd Qu.:0.000  
##  Max.   :0     Max.   :1.000       Max.   :1.000     Max.   :1.000  
##  V14.Yugoslavia   V15...50K        V15..50K    
##  Min.   :0      Min.   :0.000   Min.   :0.000  
##  1st Qu.:0      1st Qu.:1.000   1st Qu.:0.000  
##  Median :0      Median :1.000   Median :0.000  
##  Mean   :0      Mean   :0.769   Mean   :0.231  
##  3rd Qu.:0      3rd Qu.:1.000   3rd Qu.:0.000  
##  Max.   :0      Max.   :1.000   Max.   :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)

# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)] 

##Two Filter Functions PCA & KDE

#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))

#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate




##*** Adult Mapper 5 intervals, 40% overlap, 5 bins

m_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
##  $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
##  $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
##  $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
    m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
    m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2)) 
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
    m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
    m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4)) 
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
    m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]


##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins

m_kde_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
##  $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
##  $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
##  $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
    m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
    m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2)) 
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
    m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
    m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4)) 
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
    m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]
library(caret)

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]

trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7, 
                                  list = FALSE, 
                                  times = 1)

head(trainIndex)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         4
## [4,]         5
## [5,]         7
## [6,]         8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test  <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models 
fitControl <- trainControl(## 10-fold CV
                           method = "cv",
                           number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest 
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
adultRfFit
## Random Forest 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8572372  0.5882934
##    100  0.8546924  0.5828444
##    150  0.8549557  0.5839639
##    200  0.8541660  0.5813642
##    250  0.8546925  0.5827775
##    300  0.8542976  0.5810137
##    350  0.8535957  0.5792854
##    400  0.8531569  0.5788817
##    450  0.8530691  0.5783394
##    500  0.8532884  0.5784096
##    550  0.8538589  0.5809612
##    600  0.8533324  0.5787809
##    650  0.8542099  0.5817356
##    700  0.8543415  0.5823679
##    750  0.8538588  0.5805136
##    800  0.8528498  0.5773478
##    850  0.8537712  0.5804901
##    900  0.8536833  0.5798539
##    950  0.8535079  0.5788895
##   1000  0.8543854  0.5818180
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
##    Accuracy     Kappa Resample
## 1 0.8636304 0.6053166    Fold1
## 2 0.8575941 0.5924862    Fold3
## 3 0.8504870 0.5670776    Fold2
ad_rf_fit_re<-adultRfFit$resample[1]


summary(adultRfFit)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       22793  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           45586  matrix     numeric  
## oob.times       22793  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               22793  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6929   891
##      >50K     487  1461
##                                           
##                Accuracy : 0.8589          
##                  95% CI : (0.8519, 0.8658)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5901          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9343          
##             Specificity : 0.6212          
##          Pos Pred Value : 0.8861          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7094          
##    Detection Prevalence : 0.8006          
##       Balanced Accuracy : 0.7778          
##                                           
##        'Positive' Class :  <=50K          
## 
rf_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.589271e-01   5.901123e-01   8.518658e-01   8.657737e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.976785e-132   1.862103e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9343312            0.6211735            0.8860614 
##       Neg Pred Value            Precision               Recall 
##            0.7500000            0.8860614            0.9343312 
##                   F1           Prevalence       Detection Rate 
##            0.9095563            0.7592138            0.7093571 
## Detection Prevalence    Balanced Accuracy 
##            0.8005733            0.7777523
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.40.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Priv.house.serv, V8.Other.relative, V8.Own.child, V14.Cambodia,
## V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands,
## V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.40.5_n1_RfFit0
## Random Forest 
## 
## 3373 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 2249, 2248, 2249 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa     
##     50  0.9925884  0.07366337
##    100  0.9925884  0.13347301
##    150  0.9922921  0.13293883
##    200  0.9925884  0.13347301
##    250  0.9922921  0.13293883
##    300  0.9922918  0.12810235
##    350  0.9928849  0.13990230
##    400  0.9925884  0.13347301
##    450  0.9925884  0.13347301
##    500  0.9916990  0.07215578
##    550  0.9922921  0.13293883
##    600  0.9919953  0.12354874
##    650  0.9919955  0.12756816
##    700  0.9925884  0.13347301
##    750  0.9925884  0.13347301
##    800  0.9922918  0.12810235
##    850  0.9925884  0.13347301
##    900  0.9922918  0.12571495
##    950  0.9925884  0.13347301
##   1000  0.9922918  0.12810235
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 350.
Adult_TDA_PC_5.40.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9920000 0.0000000    Fold2
## 2 0.9937722 0.2209901    Fold1
## 3 0.9928826 0.1987168    Fold3
ad_tda_pc_5.40.5_n1_rf_fit0_re<-Adult_TDA_PC_5.40.5_n1_RfFit0$resample[1]


summary(Adult_TDA_PC_5.40.5_n1_RfFit0)
##                 Length Class      Mode     
## call               5   -none-     call     
## type               1   -none-     character
## predicted       3373   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           6746   matrix     numeric  
## oob.times       3373   -none-     numeric  
## classes            2   -none-     character
## importance       108   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y               3373   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           108   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## param              1   -none-     list
vip(Adult_TDA_PC_5.40.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      5     0
##      >50K    7411  2352
##                                           
##                Accuracy : 0.2413          
##                  95% CI : (0.2328, 0.2499)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 3e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0006742       
##             Specificity : 1.0000000       
##          Pos Pred Value : 1.0000000       
##          Neg Pred Value : 0.2409096       
##              Prevalence : 0.7592138       
##          Detection Rate : 0.0005119       
##    Detection Prevalence : 0.0005119       
##       Balanced Accuracy : 0.5003371       
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      5     0
##      >50K    7411  2352
##                                           
##                Accuracy : 0.2413          
##                  95% CI : (0.2328, 0.2499)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 3e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0006742       
##             Specificity : 1.0000000       
##          Pos Pred Value : 1.0000000       
##          Neg Pred Value : 0.2409096       
##              Prevalence : 0.7592138       
##          Detection Rate : 0.0005119       
##    Detection Prevalence : 0.0005119       
##       Balanced Accuracy : 0.5003371       
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.2412981163   0.0003247983   0.2328399276   0.2499108150   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   1.0000000000   0.0000000000
ad_tda_pc_5.40.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##         0.0006742179         1.0000000000         1.0000000000 
##       Neg Pred Value            Precision               Recall 
##         0.2409095565         1.0000000000         0.0006742179 
##                   F1           Prevalence       Detection Rate 
##         0.0013475273         0.7592137592         0.0005118755 
## Detection Prevalence    Balanced Accuracy 
##         0.0005118755         0.5003371090
ad_tda_pc_5.40.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n1_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n1_3_fold
##     Accuracy
## 1 -0.1283696
## 2 -0.1361781
## 3 -0.1423956
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n1_3_fold$probRight
bst_tda_pca_5.40.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n1_3_fold
## $winLeft
## [1] 0.9925667
## 
## $winRope
## [1] 0.007433333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n1_3_fold
## $left
## [1] 0.9993062
## 
## $rope
## [1] 0.0001771861
## 
## $right
## [1] 0.0005166198
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold))
#bf_tda_pca_5.40.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold)
## t = -33.43, df = 2, p-value = 0.0008936
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1531063 -0.1181892
## sample estimates:
##  mean of x 
## -0.1356478
### Test set diff
diff_tda_pca_5.40.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n1_test
## Accuracy 
## 0.617629
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n1_test_odds.left<-bst_tda_pca_5.40.5_rf.n1_test$probLeft/bst_tda_pca_5.40.5_rf.n1_test$probRight
bst_tda_pca_5.40.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1570667
## 
## $winRight
## [1] 0.8429333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n1_test)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n1_test)) #bf_tda_pca_5.40.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n1_test))

##Node2

Adult_TDA_PC_5.40.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n2_RfFit0
## Random Forest 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6851, 6850 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.7335542  0.4367726
##    100  0.7282992  0.4257392
##    150  0.7267423  0.4225249
##    200  0.7261581  0.4218180
##    250  0.7256720  0.4203823
##    300  0.7277149  0.4246847
##    350  0.7271316  0.4232567
##    400  0.7285910  0.4269845
##    450  0.7285909  0.4266089
##    500  0.7265476  0.4222618
##    550  0.7265474  0.4224339
##    600  0.7278130  0.4244236
##    650  0.7284939  0.4263608
##    700  0.7268396  0.4227543
##    750  0.7276179  0.4242865
##    800  0.7281044  0.4255446
##    850  0.7284933  0.4255964
##    900  0.7275207  0.4242161
##    950  0.7244067  0.4172622
##   1000  0.7281044  0.4257152
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7462774 0.4633532    Fold1
## 2 0.7302977 0.4283172    Fold3
## 3 0.7240876 0.4186475    Fold2
ad_tda_pc_5.40.5_n2_rf_fit0_re<-Adult_TDA_PC_5.40.5_n2_RfFit0$resample[1]


summary(Adult_TDA_PC_5.40.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       10276  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           20552  matrix     numeric  
## oob.times       10276  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               10276  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.40.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1367     7
##      >50K    6049  2345
##                                           
##                Accuracy : 0.38            
##                  95% CI : (0.3704, 0.3897)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0966          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1843          
##             Specificity : 0.9970          
##          Pos Pred Value : 0.9949          
##          Neg Pred Value : 0.2794          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1399          
##    Detection Prevalence : 0.1407          
##       Balanced Accuracy : 0.5907          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1367     7
##      >50K    6049  2345
##                                           
##                Accuracy : 0.38            
##                  95% CI : (0.3704, 0.3897)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0966          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1843          
##             Specificity : 0.9970          
##          Pos Pred Value : 0.9949          
##          Neg Pred Value : 0.2794          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1399          
##    Detection Prevalence : 0.1407          
##       Balanced Accuracy : 0.5907          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.38001638     0.09661567     0.37037636     0.38972799     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.40.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1843312            0.9970238            0.9949054 
##       Neg Pred Value            Precision               Recall 
##            0.2793662            0.9949054            0.1843312 
##                   F1           Prevalence       Detection Rate 
##            0.3110353            0.7592138            0.1399468 
## Detection Prevalence    Balanced Accuracy 
##            0.1406634            0.5906775
ad_tda_pc_5.40.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n2_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n2_3_fold
##    Accuracy
## 1 0.1173530
## 2 0.1272964
## 3 0.1263994
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n2_3_fold$probRight
bst_tda_pca_5.40.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.01003333
## 
## $winRight
## [1] 0.9899667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_3_fold
## $left
## [1] 0.000375751
## 
## $rope
## [1] 0.0001436159
## 
## $right
## [1] 0.9994806
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold))
#bf_tda_pca_5.40.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold)
## t = 38.949, df = 2, p-value = 0.0006585
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1100197 0.1373461
## sample estimates:
## mean of x 
## 0.1236829
### Test set diff
diff_tda_pca_5.40.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n2_test
##  Accuracy 
## 0.4789107
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n2_test_odds.left<-bst_tda_pca_5.40.5_rf.n2_test$probLeft/bst_tda_pca_5.40.5_rf.n2_test$probRight
bst_tda_pca_5.40.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1592667
## 
## $winRight
## [1] 0.8407333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n2_test)) #bf_tda_pca_5.40.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n2_test))

##Node3

Adult_TDA_PC_5.40.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n3_RfFit0
## Random Forest 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7708, 7709, 7709 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8753780  0.5837723
##    100  0.8715727  0.5762723
##    150  0.8713133  0.5761533
##    200  0.8708808  0.5754117
##    250  0.8708812  0.5751869
##    300  0.8707082  0.5742771
##    350  0.8709674  0.5753250
##    400  0.8721782  0.5784881
##    450  0.8698431  0.5720834
##    500  0.8711402  0.5753669
##    550  0.8713133  0.5759419
##    600  0.8705349  0.5735405
##    650  0.8713133  0.5772845
##    700  0.8708808  0.5745360
##    750  0.8711404  0.5760373
##    800  0.8709675  0.5744506
##    850  0.8707080  0.5742496
##    900  0.8706215  0.5736133
##    950  0.8707944  0.5742088
##   1000  0.8706215  0.5743404
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8793774 0.5999564    Fold1
## 2 0.8692268 0.5626484    Fold3
## 3 0.8775298 0.5887122    Fold2
ad_tda_pc_5.40.5_n3_rf_fit0_re<-Adult_TDA_PC_5.40.5_n3_RfFit0$resample[1]


summary(Adult_TDA_PC_5.40.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       11563  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           23126  matrix     numeric  
## oob.times       11563  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11563  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.40.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4125  1308
##      >50K    3291  1044
##                                           
##                Accuracy : 0.5292          
##                  95% CI : (0.5192, 0.5391)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 1e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5562          
##             Specificity : 0.4439          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4223          
##    Detection Prevalence : 0.5562          
##       Balanced Accuracy : 0.5001          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4125  1308
##      >50K    3291  1044
##                                           
##                Accuracy : 0.5292          
##                  95% CI : (0.5192, 0.5391)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 1e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5562          
##             Specificity : 0.4439          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4223          
##    Detection Prevalence : 0.5562          
##       Balanced Accuracy : 0.5001          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.291769e-01   8.333560e-05   5.192196e-01   5.391168e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  9.019515e-188
ad_tda_pc_5.40.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.5562298            0.4438776            0.7592490 
##       Neg Pred Value            Precision               Recall 
##            0.2408304            0.7592490            0.5562298 
##                   F1           Prevalence       Detection Rate 
##            0.6420733            0.7592138            0.4222973 
## Detection Prevalence    Balanced Accuracy 
##            0.5562039            0.5000537
ad_tda_pc_5.40.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n3_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n3_3_fold
##      Accuracy
## 1 -0.01574705
## 2 -0.01163267
## 3 -0.02704287
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n3_3_fold$probRight
bst_tda_pca_5.40.5_rf.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n3_3_fold
## $winLeft
## [1] 0.9109333
## 
## $winRope
## [1] 0.08906667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_3_fold
## $left
## [1] 0.000375751
## 
## $rope
## [1] 0.0001436159
## 
## $right
## [1] 0.9994806
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold))
#bf_tda_pca_5.40.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold)
## t = -3.9379, df = 2, p-value = 0.05885
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.037962087  0.001680357
## sample estimates:
##   mean of x 
## -0.01814086
### Test set diff
diff_tda_pca_5.40.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n3_test
##  Accuracy 
## 0.3297502
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n3_test_odds.left<-bst_tda_pca_5.40.5_rf.n3_test$probLeft/bst_tda_pca_5.40.5_rf.n3_test$probRight
bst_tda_pca_5.40.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1555
## 
## $winRight
## [1] 0.8445
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n3_test))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n3_test)) #bf_tda_pca_5.40.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n2_test)

##Node4

Adult_TDA_PC_5.40.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n4_RfFit0
## Random Forest 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9878, 9879, 9879 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.9644351  0.3632696
##    100  0.9630854  0.3515652
##    150  0.9631529  0.3550414
##    200  0.9632204  0.3579904
##    250  0.9631529  0.3549535
##    300  0.9634228  0.3568652
##    350  0.9632204  0.3567692
##    400  0.9630854  0.3545482
##    450  0.9634228  0.3598996
##    500  0.9632878  0.3561166
##    550  0.9628829  0.3543434
##    600  0.9631529  0.3536049
##    650  0.9631529  0.3549535
##    700  0.9629504  0.3561752
##    750  0.9633554  0.3576292
##    800  0.9632878  0.3599696
##    850  0.9633553  0.3565252
##    900  0.9631529  0.3551875
##    950  0.9631529  0.3550018
##   1000  0.9631529  0.3549273
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9655870 0.3879696    Fold1
## 2 0.9629480 0.3162524    Fold3
## 3 0.9647702 0.3855867    Fold2
ad_tda_pc_5.40.5_n4_rf_fit0_re<-Adult_TDA_PC_5.40.5_n4_RfFit0$resample[1]


summary(Adult_TDA_PC_5.40.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       14818  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           29636  matrix     numeric  
## oob.times       14818  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               14818  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.40.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7412  1780
##      >50K       4   572
##                                          
##                Accuracy : 0.8174         
##                  95% CI : (0.8096, 0.825)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3269         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9995         
##             Specificity : 0.2432         
##          Pos Pred Value : 0.8064         
##          Neg Pred Value : 0.9931         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7588         
##    Detection Prevalence : 0.9410         
##       Balanced Accuracy : 0.6213         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.40.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7412  1780
##      >50K       4   572
##                                          
##                Accuracy : 0.8174         
##                  95% CI : (0.8096, 0.825)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3269         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9995         
##             Specificity : 0.2432         
##          Pos Pred Value : 0.8064         
##          Neg Pred Value : 0.9931         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7588         
##    Detection Prevalence : 0.9410         
##       Balanced Accuracy : 0.6213         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.40.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.173628e-01   3.269485e-01   8.095553e-01   8.249806e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   6.438822e-44   0.000000e+00
ad_tda_pc_5.40.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9994606            0.2431973            0.8063534 
##       Neg Pred Value            Precision               Recall 
##            0.9930556            0.8063534            0.9994606 
##                   F1           Prevalence       Detection Rate 
##            0.8925819            0.7592138            0.7588043 
## Detection Prevalence    Balanced Accuracy 
##            0.9410319            0.6213290
ad_tda_pc_5.40.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n4_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n4_3_fold
##     Accuracy
## 1 -0.1019567
## 2 -0.1053539
## 3 -0.1142832
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n4_3_fold$probRight
bst_tda_pca_5.40.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n4_3_fold
## $winLeft
## [1] 0.9906
## 
## $winRope
## [1] 0.0094
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n4_3_fold
## $left
## [1] 0.9990492
## 
## $rope
## [1] 0.0002962323
## 
## $right
## [1] 0.0006545455
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold))
#bf_tda_pca_5.40.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold)
## t = -29.162, df = 2, p-value = 0.001174
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.12301398 -0.09138185
## sample estimates:
##  mean of x 
## -0.1071979
### Test set diff
diff_tda_pca_5.40.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n4_test
##   Accuracy 
## 0.04156429
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n4_test_odds.left<-bst_tda_pca_5.40.5_rf.n4_test$probLeft/bst_tda_pca_5.40.5_rf.n4_test$probRight
bst_tda_pca_5.40.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1594667
## 
## $winRight
## [1] 0.8405333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n4_test))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n4_test)) #bf_tda_pca_5.40.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n4_test))

##Node5

Adult_TDA_PC_5.40.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
Adult_TDA_PC_5.40.5_n5_RfFit0
## Random Forest 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8054, 8054, 8054 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa        
##     50  0.9996689   0.0000000000
##    100  0.9995034  -0.0001655903
##    150  0.9994206  -0.0001931979
##    200  0.9994206  -0.0001931979
##    250  0.9994206  -0.0001931979
##    300  0.9994206  -0.0001931979
##    350  0.9994206  -0.0001931979
##    400  0.9995034  -0.0001655903
##    450  0.9995034  -0.0001655903
##    500  0.9993378  -0.0002070033
##    550  0.9995034  -0.0001655903
##    600  0.9995034  -0.0001655903
##    650  0.9994206  -0.0001931979
##    700  0.9994206  -0.0001931979
##    750  0.9994206  -0.0001931979
##    800  0.9994206  -0.0001931979
##    850  0.9994206  -0.0001931979
##    900  0.9994206  -0.0001931979
##    950  0.9995034  -0.0001655903
##   1000  0.9994206  -0.0001931979
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n5_RfFit0$resample
##    Accuracy Kappa Resample
## 1 0.9997517     0    Fold1
## 2 0.9995034     0    Fold3
## 3 0.9997517     0    Fold2
ad_tda_pc_5.40.5_n5_rf_fit0_re<-Adult_TDA_PC_5.40.5_n5_RfFit0$resample[1]


summary(Adult_TDA_PC_5.40.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       12081  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           24162  matrix     numeric  
## oob.times       12081  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               12081  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.40.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.40.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.40.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n5_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n5_3_fold
##     Accuracy
## 1 -0.1361213
## 2 -0.1419092
## 3 -0.1492647
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n5_3_fold$probRight
bst_tda_pca_5.40.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n5_3_fold
## $winLeft
## [1] 0.9907333
## 
## $winRope
## [1] 0.009266667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n5_3_fold
## $left
## [1] 0.9994511
## 
## $rope
## [1] 0.000134423
## 
## $right
## [1] 0.0004144829
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold))
#bf_tda_pca_5.40.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold)
## t = -37.451, df = 2, p-value = 0.0007122
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1587954 -0.1260681
## sample estimates:
##  mean of x 
## -0.1424318
### Test set diff
diff_tda_pca_5.40.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n5_test
##   Accuracy 
## 0.09971335
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_rf.n5_test_odds.left<-bst_tda_pca_5.40.5_rf.n5_test$probLeft/bst_tda_pca_5.40.5_rf.n5_test$probRight
bst_tda_pca_5.40.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1600667
## 
## $winRight
## [1] 0.8399333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n5_test))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n5_test)) #bf_tda_pca_5.40.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.40.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n1_RfFit0
## Random Forest 
## 
## 11838 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7892, 7891, 7893 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8636600  0.6334583
##    100  0.8618857  0.6292076
##    150  0.8610410  0.6278349
##    200  0.8604498  0.6259625
##    250  0.8612101  0.6274137
##    300  0.8613790  0.6283440
##    350  0.8621391  0.6304952
##    400  0.8616323  0.6293076
##    450  0.8619699  0.6296452
##    500  0.8617169  0.6289204
##    550  0.8628996  0.6322507
##    600  0.8618859  0.6295355
##    650  0.8610410  0.6271913
##    700  0.8611255  0.6277824
##    750  0.8611257  0.6278591
##    800  0.8626458  0.6316204
##    850  0.8602808  0.6251932
##    900  0.8606187  0.6262678
##    950  0.8612944  0.6278230
##   1000  0.8606189  0.6254455
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8621389 0.6302650    Fold1
## 2 0.8676806 0.6442454    Fold3
## 3 0.8611604 0.6258646    Fold2
ad_tda_kde_5.40.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n1_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.40.5_n1_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       11838  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           23676  matrix     numeric  
## oob.times       11838  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11838  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.40.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7164   653
##      >50K     252  1699
##                                          
##                Accuracy : 0.9074         
##                  95% CI : (0.9014, 0.913)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.7309         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9660         
##             Specificity : 0.7224         
##          Pos Pred Value : 0.9165         
##          Neg Pred Value : 0.8708         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7334         
##    Detection Prevalence : 0.8003         
##       Balanced Accuracy : 0.8442         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7164   653
##      >50K     252  1699
##                                          
##                Accuracy : 0.9074         
##                  95% CI : (0.9014, 0.913)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.7309         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9660         
##             Specificity : 0.7224         
##          Pos Pred Value : 0.9165         
##          Neg Pred Value : 0.8708         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7334         
##    Detection Prevalence : 0.8003         
##       Balanced Accuracy : 0.8442         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.073505e-01   7.309312e-01   9.014274e-01   9.130295e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.000598e-313   2.427221e-40
ad_tda_kde_5.40.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9660194            0.7223639            0.9164641 
##       Neg Pred Value            Precision               Recall 
##            0.8708355            0.9164641            0.9660194 
##                   F1           Prevalence       Detection Rate 
##            0.9405895            0.7592138            0.7334152 
## Detection Prevalence    Balanced Accuracy 
##            0.8002662            0.8441917
ad_tda_kde_5.40.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n1_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n1_3_fold
##       Accuracy
## 1  0.001491506
## 2 -0.010086505
## 3 -0.010673405
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n1_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n1_3_fold$probRight
bst_tda_kde_5.40.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n1_3_fold
## $winLeft
## [1] 0.3269333
## 
## $winRope
## [1] 0.6730667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n1_3_fold
## $left
## [1] 0.2580109
## 
## $rope
## [1] 0.7072094
## 
## $right
## [1] 0.03477973
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold))
#bf_tda_kde_5.40.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold)
## t = -1.6216, df = 2, p-value = 0.2463
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02346466  0.01061905
## sample estimates:
##    mean of x 
## -0.006422801
### Test set diff
diff_tda_kde_5.40.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n1_test
##    Accuracy 
## -0.04842342
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n1_test_odds.left<-bst_tda_kde_5.40.5_rf.n1_test$probLeft/bst_tda_kde_5.40.5_rf.n1_test$probRight
bst_tda_kde_5.40.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n1_test
## $winLeft
## [1] 0.8396
## 
## $winRope
## [1] 0.1604
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n1_test))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n1_test)) #bf_tda_kde_5.40.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n1_test))

##Node2

Adult_TDA_KDE_5.40.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
Adult_TDA_KDE_5.40.5_n2_RfFit0
## Random Forest 
## 
## 11203 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7469, 7469, 7468 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8457554  0.6047366
##    100  0.8437920  0.6001984
##    150  0.8432564  0.5986506
##    200  0.8438811  0.6011784
##    250  0.8432563  0.5995230
##    300  0.8422745  0.5964219
##    350  0.8437919  0.6008044
##    400  0.8435241  0.6001002
##    450  0.8419176  0.5964063
##    500  0.8420959  0.5963587
##    550  0.8421852  0.5972741
##    600  0.8425421  0.5972713
##    650  0.8427208  0.5981473
##    700  0.8430777  0.5992367
##    750  0.8431669  0.5988447
##    800  0.8441490  0.6009856
##    850  0.8426316  0.5982195
##    900  0.8426314  0.5982829
##    950  0.8433456  0.6005848
##   1000  0.8423636  0.5973269
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8393144 0.5891400    Fold1
## 2 0.8476573 0.6098193    Fold3
## 3 0.8502946 0.6152507    Fold2
ad_tda_KDE_5.40.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n2_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.40.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       11203  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           22406  matrix     numeric  
## oob.times       11203  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11203  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.40.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7129   623
##      >50K     287  1729
##                                           
##                Accuracy : 0.9068          
##                  95% CI : (0.9009, 0.9125)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7321          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9613          
##             Specificity : 0.7351          
##          Pos Pred Value : 0.9196          
##          Neg Pred Value : 0.8576          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7298          
##    Detection Prevalence : 0.7936          
##       Balanced Accuracy : 0.8482          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7129   623
##      >50K     287  1729
##                                           
##                Accuracy : 0.9068          
##                  95% CI : (0.9009, 0.9125)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7321          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9613          
##             Specificity : 0.7351          
##          Pos Pred Value : 0.9196          
##          Neg Pred Value : 0.8576          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7298          
##    Detection Prevalence : 0.7936          
##       Balanced Accuracy : 0.8482          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.068387e-01   7.321284e-01   9.009014e-01   9.125320e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.139479e-310   1.184291e-28
ad_tda_kde_5.40.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9612999            0.7351190            0.9196336 
##       Neg Pred Value            Precision               Recall 
##            0.8576389            0.9196336            0.9612999 
##                   F1           Prevalence       Detection Rate 
##            0.9400053            0.7592138            0.7298321 
## Detection Prevalence    Balanced Accuracy 
##            0.7936118            0.8482095
ad_tda_kde_5.40.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.40.5_n2_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n2_3_fold
##      Accuracy
## 1 0.024315972
## 2 0.009936808
## 3 0.000192380
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n2_3_fold$probRight
bst_tda_kde_5.40.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.5785
## 
## $winRight
## [1] 0.4215
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n2_3_fold
## $left
## [1] 0.05869393
## 
## $rope
## [1] 0.3770915
## 
## $right
## [1] 0.5642146
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold))
#bf_tda_kde_5.40.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold)
## t = 1.6387, df = 2, p-value = 0.2429
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01866521  0.04162865
## sample estimates:
##  mean of x 
## 0.01148172
### Test set diff
diff_tda_kde_5.40.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n2_test
##    Accuracy 
## -0.04791155
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n2_test_odds.left<-bst_tda_kde_5.40.5_rf.n2_test$probLeft/bst_tda_kde_5.40.5_rf.n2_test$probRight
bst_tda_kde_5.40.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n2_test
## $winLeft
## [1] 0.8406333
## 
## $winRope
## [1] 0.1593667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n2_test))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n2_test)) #bf_tda_kde_5.40.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n2_test))

##Node3

Adult_TDA_KDE_5.40.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n3_RfFit0
## Random Forest 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6901, 6900 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8282292  0.5644916
##    100  0.8256209  0.5577431
##    150  0.8257176  0.5582549
##    200  0.8263937  0.5600762
##    250  0.8269736  0.5618642
##    300  0.8262973  0.5598900
##    350  0.8267804  0.5605272
##    400  0.8249448  0.5561782
##    450  0.8256208  0.5577533
##    500  0.8254278  0.5574561
##    550  0.8250415  0.5564941
##    600  0.8257177  0.5579884
##    650  0.8268768  0.5608959
##    700  0.8265870  0.5604204
##    750  0.8252347  0.5570562
##    800  0.8268766  0.5615544
##    850  0.8258142  0.5580689
##    900  0.8262972  0.5598700
##    950  0.8273599  0.5617098
##   1000  0.8251382  0.5572343
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8304348 0.5733312    Fold1
## 2 0.8281657 0.5637203    Fold3
## 3 0.8260870 0.5564232    Fold2
ad_tda_kde_5.40.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n3_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.40.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       10351  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           20702  matrix     numeric  
## oob.times       10351  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               10351  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.40.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7074   600
##      >50K     342  1752
##                                           
##                Accuracy : 0.9036          
##                  95% CI : (0.8975, 0.9093)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.726           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9539          
##             Specificity : 0.7449          
##          Pos Pred Value : 0.9218          
##          Neg Pred Value : 0.8367          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7242          
##    Detection Prevalence : 0.7856          
##       Balanced Accuracy : 0.8494          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7074   600
##      >50K     342  1752
##                                           
##                Accuracy : 0.9036          
##                  95% CI : (0.8975, 0.9093)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.726           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9539          
##             Specificity : 0.7449          
##          Pos Pred Value : 0.9218          
##          Neg Pred Value : 0.8367          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7242          
##    Detection Prevalence : 0.7856          
##       Balanced Accuracy : 0.8494          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.035627e-01   7.259706e-01   8.975366e-01   9.093468e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  2.867234e-295   5.592576e-17
ad_tda_kde_5.40.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9538835            0.7448980            0.9218139 
##       Neg Pred Value            Precision               Recall 
##            0.8366762            0.9218139            0.9538835 
##                   F1           Prevalence       Detection Rate 
##            0.9375746            0.7592138            0.7242015 
## Detection Prevalence    Balanced Accuracy 
##            0.7856265            0.8493907
ad_tda_kde_5.40.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n3_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n3_3_fold
##     Accuracy
## 1 0.03319560
## 2 0.02942835
## 3 0.02440001
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n3_3_fold$probRight
bst_tda_kde_5.40.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0084
## 
## $winRight
## [1] 0.9916
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n3_3_fold
## $left
## [1] 0.002819873
## 
## $rope
## [1] 0.008743332
## 
## $right
## [1] 0.9884368
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold))
#bf_tda_kde_5.40.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold)
## t = 11.386, df = 2, p-value = 0.007626
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01804590 0.03997008
## sample estimates:
##  mean of x 
## 0.02900799
### Test set diff
diff_tda_kde_5.40.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n3_test
##    Accuracy 
## -0.04463554
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n3_test_odds.left<-bst_tda_kde_5.40.5_rf.n3_test$probLeft/bst_tda_kde_5.40.5_rf.n3_test$probRight
bst_tda_kde_5.40.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n3_test
## $winLeft
## [1] 0.8372667
## 
## $winRope
## [1] 0.1627333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n3_test))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n3_test)) #bf_tda_kde_5.40.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n3_test))

##Node4

Adult_TDA_KDE_5.40.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n4_RfFit0
## Random Forest 
## 
## 8741 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5828, 5827, 5827 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8557372  0.5207916
##    100  0.8533348  0.5155632
##    150  0.8528771  0.5153671
##    200  0.8523052  0.5125524
##    250  0.8520763  0.5117217
##    300  0.8519622  0.5103528
##    350  0.8542499  0.5181504
##    400  0.8515043  0.5103713
##    450  0.8529916  0.5143107
##    500  0.8529915  0.5145521
##    550  0.8527628  0.5155377
##    600  0.8527627  0.5150852
##    650  0.8525339  0.5122389
##    700  0.8519620  0.5105925
##    750  0.8519620  0.5114700
##    800  0.8517330  0.5087312
##    850  0.8512755  0.5100794
##    900  0.8526485  0.5148234
##    950  0.8531060  0.5161048
##   1000  0.8517331  0.5098221
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8547889 0.5179049    Fold1
## 2 0.8544955 0.5191210    Fold3
## 3 0.8579272 0.5253489    Fold2
ad_tda_kde_5.40.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n4_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.40.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted        8741  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           17482  matrix     numeric  
## oob.times        8741  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                8741  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.40.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6857   684
##      >50K     559  1668
##                                          
##                Accuracy : 0.8727         
##                  95% CI : (0.866, 0.8793)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.6455         
##                                          
##  Mcnemar's Test P-Value : 0.0004363      
##                                          
##             Sensitivity : 0.9246         
##             Specificity : 0.7092         
##          Pos Pred Value : 0.9093         
##          Neg Pred Value : 0.7490         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7020         
##    Detection Prevalence : 0.7720         
##       Balanced Accuracy : 0.8169         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6857   684
##      >50K     559  1668
##                                          
##                Accuracy : 0.8727         
##                  95% CI : (0.866, 0.8793)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.6455         
##                                          
##  Mcnemar's Test P-Value : 0.0004363      
##                                          
##             Sensitivity : 0.9246         
##             Specificity : 0.7092         
##          Pos Pred Value : 0.9093         
##          Neg Pred Value : 0.7490         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7020         
##    Detection Prevalence : 0.7720         
##       Balanced Accuracy : 0.8169         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.727477e-01   6.455193e-01   8.659765e-01   8.792959e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.430507e-174   4.362707e-04
ad_tda_kde_5.40.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9246224            0.7091837            0.9092958 
##       Neg Pred Value            Precision               Recall 
##            0.7489897            0.9092958            0.9246224 
##                   F1           Prevalence       Detection Rate 
##            0.9168951            0.7592138            0.7019861 
## Detection Prevalence    Balanced Accuracy 
##            0.7720106            0.8169031
ad_tda_kde_5.40.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n4_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n4_3_fold
##       Accuracy
## 1  0.008841503
## 2  0.003098565
## 3 -0.007440278
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n4_3_fold$probRight
bst_tda_kde_5.40.5_rf.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n4_3_fold
## $left
## [1] 0.08596089
## 
## $rope
## [1] 0.7827507
## 
## $right
## [1] 0.1312884
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.40.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n4_test
##    Accuracy 
## -0.01382064
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 
#BayesFactor
#bf_tda_kde_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold))
#bf_tda_kde_5.40.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold)
## t = 0.31461, df = 2, p-value = 0.7828
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01901352  0.02201338
## sample estimates:
##  mean of x 
## 0.00149993
bst_tda_kde_5.40.5_rf.n4_test_odds.left<-bst_tda_kde_5.40.5_rf.n4_test$probLeft/bst_tda_kde_5.40.5_rf.n4_test$probRight
bst_tda_kde_5.40.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n4_test
## $winLeft
## [1] 0.5420333
## 
## $winRope
## [1] 0.4579667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n4_test))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n4_test)) #bf_tda_kde_5.40.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n4_test))

##Node5

Adult_TDA_KDE_5.40.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n5_RfFit0
## Random Forest 
## 
## 6628 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4418, 4419, 4419 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8735676  0.4123904
##    100  0.8690414  0.3973959
##    150  0.8707014  0.4083172
##    200  0.8705502  0.4076269
##    250  0.8731153  0.4191783
##    300  0.8707012  0.4079601
##    350  0.8696450  0.4025312
##    400  0.8707014  0.4041953
##    450  0.8711541  0.4068876
##    500  0.8714556  0.4118756
##    550  0.8707008  0.4057698
##    600  0.8713050  0.4069658
##    650  0.8702485  0.4035178
##    700  0.8714559  0.4113993
##    750  0.8707014  0.4058444
##    800  0.8719084  0.4115080
##    850  0.8688906  0.4002628
##    900  0.8707010  0.4011834
##    950  0.8714557  0.4096678
##   1000  0.8691925  0.4013203
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8674208 0.3928947    Fold1
## 2 0.8750566 0.4271266    Fold3
## 3 0.8782254 0.4171498    Fold2
ad_tda_kde_5.40.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n5_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.40.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted        6628  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           13256  matrix     numeric  
## oob.times        6628  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                6628  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.40.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6836   896
##      >50K     580  1456
##                                           
##                Accuracy : 0.8489          
##                  95% CI : (0.8416, 0.8559)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5668          
##                                           
##  Mcnemar's Test P-Value : 2.421e-16       
##                                           
##             Sensitivity : 0.9218          
##             Specificity : 0.6190          
##          Pos Pred Value : 0.8841          
##          Neg Pred Value : 0.7151          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6998          
##    Detection Prevalence : 0.7916          
##       Balanced Accuracy : 0.7704          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6836   896
##      >50K     580  1456
##                                           
##                Accuracy : 0.8489          
##                  95% CI : (0.8416, 0.8559)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5668          
##                                           
##  Mcnemar's Test P-Value : 2.421e-16       
##                                           
##             Sensitivity : 0.9218          
##             Specificity : 0.6190          
##          Pos Pred Value : 0.8841          
##          Neg Pred Value : 0.7151          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6998          
##    Detection Prevalence : 0.7916          
##       Balanced Accuracy : 0.7704          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.488943e-01   5.668402e-01   8.416367e-01   8.559433e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.008218e-105   2.421481e-16
ad_tda_kde_5.40.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9217907            0.6190476            0.8841180 
##       Neg Pred Value            Precision               Recall 
##            0.7151277            0.8841180            0.9217907 
##                   F1           Prevalence       Detection Rate 
##            0.9025614            0.7592138            0.6998362 
## Detection Prevalence    Balanced Accuracy 
##            0.7915643            0.7704192
ad_tda_kde_5.40.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n5_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n5_3_fold
##       Accuracy
## 1 -0.003790434
## 2 -0.017462483
## 3 -0.027738471
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n5_3_fold$probRight
bst_tda_kde_5.40.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n5_3_fold
## $winLeft
## [1] 0.7847667
## 
## $winRope
## [1] 0.2152333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n5_3_fold
## $left
## [1] 0.7439305
## 
## $rope
## [1] 0.2153731
## 
## $right
## [1] 0.04069636
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold))
#bf_tda_kde_5.40.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold)
## t = -2.3543, df = 2, p-value = 0.1428
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.04617510  0.01351418
## sample estimates:
##   mean of x 
## -0.01633046
### Test set diff
diff_tda_kde_5.40.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n5_test
##   Accuracy 
## 0.01003276
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_rf.n5_test_odds.left<-bst_tda_kde_5.40.5_rf.n5_test$probLeft/bst_tda_kde_5.40.5_rf.n5_test$probRight
bst_tda_kde_5.40.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4589333
## 
## $winRight
## [1] 0.5410667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n5_test))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n5_test)) #bf_tda_kde_5.40.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n5_test))

##Non-TDA-Assisted

svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))

#Support Vector Machine-Radial Basis 
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                   Importance = T,
                   method = 'svmRadial', 
                 trControl = fitControl,
                         tuneGrid = svmGrid, preProc = c('center','scale'),
                         metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15195, 15196 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa       
##    0.1   0.25  0.8074847  0.3500101868
##    0.1   0.50  0.8165665  0.4055675167
##    0.1   0.75  0.8211732  0.4325583188
##    0.1   1.00  0.8231914  0.4476493440
##    0.1   1.25  0.8238056  0.4561167589
##    1.0   0.25  0.7781336  0.1411353617
##    1.0   0.50  0.7882245  0.2207762591
##    1.0   0.75  0.7932698  0.2634153629
##    1.0   1.00  0.7972622  0.2991704557
##    1.0   1.25  0.7980081  0.3157845092
##   10.0   0.25  0.7592243  0.0002766655
##   10.0   0.50  0.7604089  0.0153264999
##   10.0   0.75  0.7637871  0.0462168622
##   10.0   1.00  0.7667705  0.0813937482
##   10.0   1.25  0.7680867  0.1017111305
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
adultSvmFit$resample
##    Accuracy     Kappa Resample
## 1 0.8239010 0.4624931    Fold1
## 2 0.8253258 0.4597737    Fold3
## 3 0.8221901 0.4460835    Fold2
ad_svm_fit_re<-adultSvmFit$resample[1]

summary(adultSvmFit)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6941  1215
##      >50K     475  1137
##                                           
##                Accuracy : 0.827           
##                  95% CI : (0.8193, 0.8344)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4698          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9359          
##             Specificity : 0.4834          
##          Pos Pred Value : 0.8510          
##          Neg Pred Value : 0.7053          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7106          
##    Detection Prevalence : 0.8350          
##       Balanced Accuracy : 0.7097          
##                                           
##        'Positive' Class :  <=50K          
## 
svm_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.269861e-01   4.698380e-01   8.193364e-01   8.344402e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.336799e-59   2.986040e-72
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9359493            0.4834184            0.8510299 
##       Neg Pred Value            Precision               Recall 
##            0.7053350            0.8510299            0.9359493 
##                   F1           Prevalence       Detection Rate 
##            0.8914719            0.7592138            0.7105856 
## Detection Prevalence    Balanced Accuracy 
##            0.8349713            0.7096838
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.40.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6851, 6850 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6835340  0.27566297
##    0.1   0.50  0.6937518  0.31603664
##    0.1   0.75  0.6959906  0.32800238
##    0.1   1.00  0.6956015  0.33313031
##    0.1   1.25  0.6946285  0.33439397
##    1.0   0.25  0.6414947  0.12102319
##    1.0   0.50  0.6639735  0.20347703
##    1.0   0.75  0.6703967  0.23720347
##    1.0   1.00  0.6742897  0.25948107
##    1.0   1.25  0.6742895  0.26935552
##   10.0   0.25  0.6145387  0.01539684
##   10.0   0.50  0.6220319  0.04604963
##   10.0   0.75  0.6260217  0.07144294
##   10.0   1.00  0.6322499  0.10247763
##   10.0   1.25  0.6360453  0.12540423
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.75.
Adult_TDA_PC_5.40.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.6919708 0.3151232    Fold1
## 2 0.6995620 0.3403526    Fold2
## 3 0.6964390 0.3285313    Fold3
ad_tda_pc_5.40.5_n1_svm_fit_re<-Adult_TDA_PC_5.40.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.40.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    920   267
##      >50K    6496  2085
##                                           
##                Accuracy : 0.3076          
##                  95% CI : (0.2985, 0.3169)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0055          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.12406         
##             Specificity : 0.88648         
##          Pos Pred Value : 0.77506         
##          Neg Pred Value : 0.24298         
##              Prevalence : 0.75921         
##          Detection Rate : 0.09419         
##    Detection Prevalence : 0.12152         
##       Balanced Accuracy : 0.50527         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    920   267
##      >50K    6496  2085
##                                           
##                Accuracy : 0.3076          
##                  95% CI : (0.2985, 0.3169)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0055          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.12406         
##             Specificity : 0.88648         
##          Pos Pred Value : 0.77506         
##          Neg Pred Value : 0.24298         
##              Prevalence : 0.75921         
##          Detection Rate : 0.09419         
##    Detection Prevalence : 0.12152         
##       Balanced Accuracy : 0.50527         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.307637183    0.005532804    0.298492123    0.316897065    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.40.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.12405609           0.88647959           0.77506318 
##       Neg Pred Value            Precision               Recall 
##           0.24297867           0.77506318           0.12405609 
##                   F1           Prevalence       Detection Rate 
##           0.21387888           0.75921376           0.09418509 
## Detection Prevalence    Balanced Accuracy 
##           0.12151925           0.50526784
ad_tda_pc_5.40.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n1_svm_fit_re)
diff_tda_pca_5.40.5_svm_n1_3_fold
##    Accuracy
## 1 0.1319302
## 2 0.1257637
## 3 0.1257511
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n1_3_fold$probRight
bst_tda_pca_5.40.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009833333
## 
## $winRight
## [1] 0.9901667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n1_3_fold
## $left
## [1] 0.0001485419
## 
## $rope
## [1] 5.467953e-05
## 
## $right
## [1] 0.9997968
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold))
#bf_tda_pca_5.40.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold)
## t = 62.118, df = 2, p-value = 0.0002591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1189618 0.1366682
## sample estimates:
## mean of x 
##  0.127815
### Test set diff
diff_tda_pca_5.40.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n1_test
##  Accuracy 
## 0.5193489
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n1_test_odds.left<-bst_tda_pca_5.40.5_svm.n1_test$probLeft/bst_tda_pca_5.40.5_svm.n1_test$probRight
bst_tda_pca_5.40.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1622667
## 
## $winRight
## [1] 0.8377333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n1_test)))

#BayesFactor
#bf_tda_pca_5.40.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n1_test)) #bf_tda_pca_5.40.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.40.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6850, 6851 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6796422  0.26891199
##    0.1   0.50  0.6914171  0.31073716
##    0.1   0.75  0.6943368  0.32414095
##    0.1   1.00  0.6958940  0.33335086
##    0.1   1.25  0.6987162  0.34249154
##    1.0   0.25  0.6406186  0.11861340
##    1.0   0.50  0.6662127  0.20951112
##    1.0   0.75  0.6698140  0.23681241
##    1.0   1.00  0.6711760  0.25299882
##    1.0   1.25  0.6750690  0.27153478
##   10.0   0.25  0.6150252  0.01691059
##   10.0   0.50  0.6189177  0.03798253
##   10.0   0.75  0.6235885  0.06403264
##   10.0   1.00  0.6301083  0.09590707
##   10.0   1.25  0.6333205  0.11641877
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.40.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7127007 0.3696479    Fold1
## 2 0.6928467 0.3287552    Fold3
## 3 0.6906013 0.3290715    Fold2
ad_tda_pc_5.40.5_n2_svm_fit_re<-Adult_TDA_PC_5.40.5_n2_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.40.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    981   271
##      >50K    6435  2081
##                                           
##                Accuracy : 0.3135          
##                  95% CI : (0.3043, 0.3228)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.009           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1323          
##             Specificity : 0.8848          
##          Pos Pred Value : 0.7835          
##          Neg Pred Value : 0.2444          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1004          
##    Detection Prevalence : 0.1282          
##       Balanced Accuracy : 0.5085          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    981   271
##      >50K    6435  2081
##                                           
##                Accuracy : 0.3135          
##                  95% CI : (0.3043, 0.3228)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.009           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1323          
##             Specificity : 0.8848          
##          Pos Pred Value : 0.7835          
##          Neg Pred Value : 0.2444          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1004          
##    Detection Prevalence : 0.1282          
##       Balanced Accuracy : 0.5085          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.3134726      0.0090039      0.3042784      0.3227781      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.40.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1322816            0.8847789            0.7835463 
##       Neg Pred Value            Precision               Recall 
##            0.2443636            0.7835463            0.1322816 
##                   F1           Prevalence       Detection Rate 
##            0.2263498            0.7592138            0.1004300 
## Detection Prevalence    Balanced Accuracy 
##            0.1281736            0.5085302
ad_tda_pc_5.40.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n2_svm_fit_re)
diff_tda_pca_5.40.5_svm_n2_3_fold
##    Accuracy
## 1 0.1112003
## 2 0.1324791
## 3 0.1315888
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n2_3_fold$probRight
bst_tda_pca_5.40.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0091
## 
## $winRight
## [1] 0.9909
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n2_3_fold
## $left
## [1] 0.001754917
## 
## $rope
## [1] 0.000658136
## 
## $right
## [1] 0.9975869
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold))
#bf_tda_pca_5.40.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold)
## t = 18, df = 2, p-value = 0.003072
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.09518898 0.15498978
## sample estimates:
## mean of x 
## 0.1250894
### Test set diff
diff_tda_pca_5.40.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n2_test
##  Accuracy 
## 0.5135135
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n2_test_odds.left<-bst_tda_pca_5.40.5_svm.n2_test$probLeft/bst_tda_pca_5.40.5_svm.n2_test$probRight
bst_tda_pca_5.40.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1605
## 
## $winRight
## [1] 0.8395
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n2_test)))

#BayesFactor
#bf_tda_pca_5.40.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n2_test)) #bf_tda_pca_5.40.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n2_test))

##Node3

Adult_TDA_PC_5.40.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7710, 7708, 7708 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa       
##    0.1   0.25  0.8126789  0.1841480591
##    0.1   0.50  0.8240082  0.2699248824
##    0.1   0.75  0.8270352  0.3061764819
##    0.1   1.00  0.8283324  0.3284044138
##    0.1   1.25  0.8270351  0.3366648606
##    1.0   0.25  0.7947764  0.0246909491
##    1.0   0.50  0.7993600  0.0632990660
##    1.0   0.75  0.8014356  0.0935180998
##    1.0   1.00  0.8024734  0.1232879248
##    1.0   1.25  0.8033378  0.1481303310
##   10.0   0.25  0.7918361  0.0006577511
##   10.0   0.50  0.7922684  0.0044211695
##   10.0   0.75  0.7927008  0.0110370893
##   10.0   1.00  0.7914033  0.0159858010
##   10.0   1.25  0.7905386  0.0212200622
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.40.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8318194 0.3469628    Fold1
## 2 0.8246433 0.3181618    Fold2
## 3 0.8285344 0.3200886    Fold3
ad_tda_pc_5.40.5_n3_svm_fit_re<-Adult_TDA_PC_5.40.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.40.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6671  1834
##      >50K     745   518
##                                           
##                Accuracy : 0.736           
##                  95% CI : (0.7271, 0.7447)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1423          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8995          
##             Specificity : 0.2202          
##          Pos Pred Value : 0.7844          
##          Neg Pred Value : 0.4101          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6829          
##    Detection Prevalence : 0.8707          
##       Balanced Accuracy : 0.5599          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6671  1834
##      >50K     745   518
##                                           
##                Accuracy : 0.736           
##                  95% CI : (0.7271, 0.7447)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1423          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8995          
##             Specificity : 0.2202          
##          Pos Pred Value : 0.7844          
##          Neg Pred Value : 0.4101          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6829          
##    Detection Prevalence : 0.8707          
##       Balanced Accuracy : 0.5599          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.359746e-01   1.422701e-01   7.271121e-01   7.446962e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  7.960558e-102
ad_tda_pc_5.40.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8995415            0.2202381            0.7843621 
##       Neg Pred Value            Precision               Recall 
##            0.4101346            0.7843621            0.8995415 
##                   F1           Prevalence       Detection Rate 
##            0.8380127            0.7592138            0.6829443 
## Detection Prevalence    Balanced Accuracy 
##            0.8707002            0.5598898
ad_tda_pc_5.40.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n3_svm_fit_re)
diff_tda_pca_5.40.5_svm_n3_3_fold
##        Accuracy
## 1 -0.0079183350
## 2  0.0006824661
## 3 -0.0063443209
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n3_3_fold$probRight
bst_tda_pca_5.40.5_svm.n3_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n3_3_fold
## $left
## [1] 0.1074374
## 
## $rope
## [1] 0.8718418
## 
## $right
## [1] 0.02072074
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold))
#bf_tda_pca_5.40.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold)
## t = -1.7121, df = 2, p-value = 0.229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.015902663  0.006849203
## sample estimates:
##   mean of x 
## -0.00452673
### Test set diff
diff_tda_pca_5.40.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n3_test
##   Accuracy 
## 0.09101147
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n3_test_odds.left<-bst_tda_pca_5.40.5_svm.n3_test$probLeft/bst_tda_pca_5.40.5_svm.n3_test$probRight
bst_tda_pca_5.40.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1599667
## 
## $winRight
## [1] 0.8400333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n3_test)))

#BayesFactor
#bf_tda_pca_5.40.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n3_test)) #bf_tda_pca_5.40.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n3_test))


##Node4

Adult_TDA_PC_5.40.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9878, 9879, 9879 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.9574841  0.011349211
##    0.1   0.50  0.9571468  0.054829463
##    0.1   0.75  0.9572143  0.081524431
##    0.1   1.00  0.9575518  0.104946524
##    0.1   1.25  0.9570794  0.110595159
##    1.0   0.25  0.9574167  0.016688355
##    1.0   0.50  0.9576866  0.049913574
##    1.0   0.75  0.9577541  0.058076344
##    1.0   1.00  0.9578891  0.076265471
##    1.0   1.25  0.9577541  0.087911776
##   10.0   0.25  0.9574167  0.000000000
##   10.0   0.50  0.9574842  0.008707248
##   10.0   0.75  0.9574167  0.014188683
##   10.0   1.00  0.9573492  0.019694916
##   10.0   1.25  0.9570118  0.024382482
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1 and C = 1.
Adult_TDA_PC_5.40.5_n4_SvmFit0$resample
##    Accuracy      Kappa Resample
## 1 0.9572874 0.09422544    Fold1
## 2 0.9584936 0.08369420    Fold2
## 3 0.9578862 0.05087677    Fold3
ad_tda_pc_5.40.5_n4_svm_fit_re<-Adult_TDA_PC_5.40.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.40.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2211
##      >50K       3   141
##                                           
##                Accuracy : 0.7733          
##                  95% CI : (0.7649, 0.7816)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.00053         
##                                           
##                   Kappa : 0.0876          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.99960         
##             Specificity : 0.05995         
##          Pos Pred Value : 0.77026         
##          Neg Pred Value : 0.97917         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75891         
##    Detection Prevalence : 0.98526         
##       Balanced Accuracy : 0.52977         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2211
##      >50K       3   141
##                                           
##                Accuracy : 0.7733          
##                  95% CI : (0.7649, 0.7816)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.00053         
##                                           
##                   Kappa : 0.0876          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.99960         
##             Specificity : 0.05995         
##          Pos Pred Value : 0.77026         
##          Neg Pred Value : 0.97917         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75891         
##    Detection Prevalence : 0.98526         
##       Balanced Accuracy : 0.52977         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7733415233   0.0876324358   0.7649068502   0.7816129147   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   0.0005299571   0.0000000000
ad_tda_pc_5.40.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99959547           0.05994898           0.77026185 
##       Neg Pred Value            Precision               Recall 
##           0.97916667           0.77026185           0.99959547 
##                   F1           Prevalence       Detection Rate 
##           0.87007042           0.75921376           0.75890663 
## Detection Prevalence    Balanced Accuracy 
##           0.98525799           0.52977222
ad_tda_pc_5.40.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n4_svm_fit_re)
diff_tda_pca_5.40.5_svm_n4_3_fold
##     Accuracy
## 1 -0.1333864
## 2 -0.1331678
## 3 -0.1356962
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n4_3_fold$probRight
bst_tda_pca_5.40.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n4_3_fold
## $winLeft
## [1] 0.99
## 
## $winRope
## [1] 0.01
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n4_3_fold
## $left
## [1] 0.9999717
## 
## $rope
## [1] 7.31671e-06
## 
## $right
## [1] 2.10061e-05
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold))
#bf_tda_pca_5.40.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold)
## t = -165.78, df = 2, p-value = 3.638e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1375635 -0.1306034
## sample estimates:
##  mean of x 
## -0.1340835
### Test set diff
diff_tda_pca_5.40.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n4_test
##   Accuracy 
## 0.05364455
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n4_test_odds.left<-bst_tda_pca_5.40.5_svm.n4_test$probLeft/bst_tda_pca_5.40.5_svm.n4_test$probRight
bst_tda_pca_5.40.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1574333
## 
## $winRight
## [1] 0.8425667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n4_test)))

#BayesFactor
#bf_tda_pca_5.40.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n4_test)) #bf_tda_pca_5.40.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n4_test))

##Node5

Adult_TDA_PC_5.40.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8054, 8053, 8055 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa
##    0.1   0.25  0.9996689  0    
##    0.1   0.50  0.9996689  0    
##    0.1   0.75  0.9996689  0    
##    0.1   1.00  0.9996689  0    
##    0.1   1.25  0.9996689  0    
##    1.0   0.25  0.9996689  0    
##    1.0   0.50  0.9996689  0    
##    1.0   0.75  0.9996689  0    
##    1.0   1.00  0.9996689  0    
##    1.0   1.25  0.9996689  0    
##   10.0   0.25  0.9996689  0    
##   10.0   0.50  0.9996689  0    
##   10.0   0.75  0.9996689  0    
##   10.0   1.00  0.9996689  0    
##   10.0   1.25  0.9996689  0    
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.40.5_n5_SvmFit0$resample
##    Accuracy Kappa Resample
## 1 0.9997517     0    Fold1
## 2 0.9995035     0    Fold2
## 3 0.9997516     0    Fold3
ad_tda_pc_5.40.5_n5_svm_fit_re<-Adult_TDA_PC_5.40.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.40.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.40.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.40.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n5_svm_fit_re)
diff_tda_pca_5.40.5_svm_n5_3_fold
##     Accuracy
## 1 -0.1758506
## 2 -0.1741777
## 3 -0.1775616
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n5_3_fold$probRight
bst_tda_pca_5.40.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n5_3_fold
## $winLeft
## [1] 0.9919667
## 
## $winRope
## [1] 0.008033333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n5_3_fold
## $left
## [1] 0.9999769
## 
## $rope
## [1] 4.708338e-06
## 
## $right
## [1] 1.841464e-05
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold))
#bf_tda_pca_5.40.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold)
## t = -180.03, df = 2, p-value = 3.085e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1800664 -0.1716602
## sample estimates:
##  mean of x 
## -0.1758633
### Test set diff
diff_tda_pca_5.40.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n5_test
##   Accuracy 
## 0.06777232
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_svm.n5_test_odds.left<-bst_tda_pca_5.40.5_svm.n5_test$probLeft/bst_tda_pca_5.40.5_svm.n5_test$probRight
bst_tda_pca_5.40.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1561667
## 
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n5_test)))

#BayesFactor
#bf_tda_pca_5.40.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n5_test)) #bf_tda_pca_5.40.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.40.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11838 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7892, 7893, 7891 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.8002198  0.35293821
##    0.1   0.50  0.8086672  0.40635211
##    0.1   0.75  0.8121308  0.43444500
##    0.1   1.00  0.8125533  0.44556808
##    0.1   1.25  0.8117085  0.44916807
##    1.0   0.25  0.7590813  0.11678424
##    1.0   0.50  0.7716675  0.19359877
##    1.0   0.75  0.7791863  0.24762956
##    1.0   1.00  0.7827345  0.27866952
##    1.0   1.25  0.7845083  0.30034452
##   10.0   0.25  0.7405812  0.00000000
##   10.0   0.50  0.7412569  0.00633962
##   10.0   0.75  0.7453960  0.03531713
##   10.0   1.00  0.7502112  0.06976379
##   10.0   1.25  0.7508026  0.08347198
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.40.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8193107 0.4667671    Fold1
## 2 0.8124208 0.4454626    Fold2
## 3 0.8059286 0.4244746    Fold3
ad_tda_kde_5.40.5_n1_svm_fit_re<-Adult_TDA_KDE_5.40.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.40.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.40.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7044  1093
##      >50K     372  1259
##                                          
##                Accuracy : 0.85           
##                  95% CI : (0.8428, 0.857)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5418         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9498         
##             Specificity : 0.5353         
##          Pos Pred Value : 0.8657         
##          Neg Pred Value : 0.7719         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7211         
##    Detection Prevalence : 0.8330         
##       Balanced Accuracy : 0.7426         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7044  1093
##      >50K     372  1259
##                                          
##                Accuracy : 0.85           
##                  95% CI : (0.8428, 0.857)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5418         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9498         
##             Specificity : 0.5353         
##          Pos Pred Value : 0.8657         
##          Neg Pred Value : 0.7719         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7211         
##    Detection Prevalence : 0.8330         
##       Balanced Accuracy : 0.7426         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.500205e-01   5.418375e-01   8.427843e-01   8.570472e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.658854e-108   6.127894e-79
ad_tda_kde_5.40.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9498382            0.5352891            0.8656753 
##       Neg Pred Value            Precision               Recall 
##            0.7719191            0.8656753            0.9498382 
##                   F1           Prevalence       Detection Rate 
##            0.9058060            0.7592138            0.7211302 
## Detection Prevalence    Balanced Accuracy 
##            0.8330262            0.7425637
ad_tda_kde_5.40.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n1_svm_fit_re)
diff_tda_kde_5.40.5_svm_n1_3_fold
##      Accuracy
## 1 0.004590332
## 2 0.012905001
## 3 0.016261497
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n1_3_fold$probRight
bst_tda_kde_5.40.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4883
## 
## $winRight
## [1] 0.5117
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n1_3_fold
## $left
## [1] 0.01686917
## 
## $rope
## [1] 0.3752071
## 
## $right
## [1] 0.6079237
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold))
#bf_tda_kde_5.40.5_svm.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold)
## t = 3.2436, df = 2, p-value = 0.08334
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003673783  0.026178336
## sample estimates:
##  mean of x 
## 0.01125228
### Test set diff
diff_tda_kde_5.40.5_svm.n1_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n1_test
##   Accuracy 
## -0.0230344
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n1_test_odds.left<-bst_tda_kde_5.40.5_svm.n1_test$probLeft/bst_tda_kde_5.40.5_svm.n1_test$probRight
bst_tda_kde_5.40.5_svm.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n1_test
## $winLeft
## [1] 0.8404333
## 
## $winRope
## [1] 0.1595667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n1_test))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n1_test)) #bf_tda_kde_5.40.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n1_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.40.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.40.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11203 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7468, 7469, 7469 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa       
##    0.1   0.25  0.7882714  0.3760350510
##    0.1   0.50  0.7997861  0.4351806953
##    0.1   0.75  0.8021071  0.4536716548
##    0.1   1.00  0.8032677  0.4651262148
##    0.1   1.25  0.8038032  0.4710841314
##    1.0   0.25  0.7457826  0.1474372360
##    1.0   0.50  0.7630102  0.2421037553
##    1.0   0.75  0.7704190  0.2888410584
##    1.0   1.00  0.7737216  0.3180011267
##    1.0   1.25  0.7723825  0.3261746724
##   10.0   0.25  0.7194502  0.0004574013
##   10.0   0.50  0.7231992  0.0217294972
##   10.0   0.75  0.7275730  0.0543398943
##   10.0   1.00  0.7320360  0.0891427200
##   10.0   1.25  0.7332856  0.1076412483
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7962517 0.4489763    Fold1
## 2 0.8031601 0.4727827    Fold3
## 3 0.8119979 0.4914934    Fold2
ad_tda_kde_5.40.5_n2_svm_fit_re<-Adult_TDA_KDE_5.40.5_n2_SvmFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.40.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7044  1046
##      >50K     372  1306
##                                           
##                Accuracy : 0.8548          
##                  95% CI : (0.8477, 0.8618)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5599          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9498          
##             Specificity : 0.5553          
##          Pos Pred Value : 0.8707          
##          Neg Pred Value : 0.7783          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7211          
##    Detection Prevalence : 0.8282          
##       Balanced Accuracy : 0.7526          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7044  1046
##      >50K     372  1306
##                                           
##                Accuracy : 0.8548          
##                  95% CI : (0.8477, 0.8618)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5599          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9498          
##             Specificity : 0.5553          
##          Pos Pred Value : 0.8707          
##          Neg Pred Value : 0.7783          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7211          
##    Detection Prevalence : 0.8282          
##       Balanced Accuracy : 0.7526          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.548321e-01   5.598899e-01   8.476893e-01   8.617626e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  7.044745e-121   1.943359e-71
ad_tda_kde_5.40.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9498382            0.5552721            0.8707046 
##       Neg Pred Value            Precision               Recall 
##            0.7783075            0.8707046            0.9498382 
##                   F1           Prevalence       Detection Rate 
##            0.9085515            0.7592138            0.7211302 
## Detection Prevalence    Balanced Accuracy 
##            0.8282146            0.7525551
ad_tda_kde_5.40.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n2_svm_fit_re)
diff_tda_kde_5.40.5_svm_n2_3_fold
##     Accuracy
## 1 0.02764935
## 2 0.02216564
## 3 0.01019219
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n2_3_fold$probRight
bst_tda_kde_5.40.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.03666667
## 
## $winRight
## [1] 0.9633333
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n2_3_fold
## $left
## [1] 0.01858525
## 
## $rope
## [1] 0.09884104
## 
## $right
## [1] 0.8825737
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold))
#bf_tda_kde_5.40.5_svm.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold)
## t = 3.8808, df = 2, p-value = 0.06044
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002174408  0.042179196
## sample estimates:
##  mean of x 
## 0.02000239
### Test set diff
diff_tda_kde_5.40.5_svm.n2_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n2_test
##    Accuracy 
## -0.02784603
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n2_test_odds.left<-bst_tda_kde_5.40.5_svm.n2_test$probLeft/bst_tda_kde_5.40.5_svm.n2_test$probRight
bst_tda_kde_5.40.5_svm.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n2_test
## $winLeft
## [1] 0.8419
## 
## $winRope
## [1] 0.1581
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n2_test))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n2_test)) #bf_tda_kde_5.40.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n2_test))

##Node3

Adult_TDA_KDE_5.40.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.40.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6900, 6901 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7800214  0.368004396
##    0.1   0.50  0.7915178  0.426074193
##    0.1   0.75  0.7922908  0.439452215
##    0.1   1.00  0.7938364  0.449779851
##    0.1   1.25  0.7938366  0.453990057
##    1.0   0.25  0.7421504  0.165743935
##    1.0   0.50  0.7541301  0.236100174
##    1.0   0.75  0.7647571  0.294722585
##    1.0   1.00  0.7691044  0.322811763
##    1.0   1.25  0.7692976  0.334532494
##   10.0   0.25  0.7148102  0.002029263
##   10.0   0.50  0.7175152  0.023948703
##   10.0   0.75  0.7235050  0.065507851
##   10.0   1.00  0.7296880  0.109367278
##   10.0   1.25  0.7305574  0.124985006
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7939130 0.4622528    Fold1
## 2 0.7959420 0.4506133    Fold3
## 3 0.7916546 0.4491040    Fold2
ad_tda_kde_5.40.5_n3_svm_fit_re<-Adult_TDA_KDE_5.40.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.40.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.40.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7009  1102
##      >50K     407  1250
##                                           
##                Accuracy : 0.8455          
##                  95% CI : (0.8382, 0.8526)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5301          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9451          
##             Specificity : 0.5315          
##          Pos Pred Value : 0.8641          
##          Neg Pred Value : 0.7544          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7175          
##    Detection Prevalence : 0.8304          
##       Balanced Accuracy : 0.7383          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7009  1102
##      >50K     407  1250
##                                           
##                Accuracy : 0.8455          
##                  95% CI : (0.8382, 0.8526)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5301          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9451          
##             Specificity : 0.5315          
##          Pos Pred Value : 0.8641          
##          Neg Pred Value : 0.7544          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7175          
##    Detection Prevalence : 0.8304          
##       Balanced Accuracy : 0.7383          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.455160e-01   5.300580e-01   8.381947e-01   8.526305e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.272474e-97   2.190137e-71
ad_tda_kde_5.40.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9451187            0.5314626            0.8641351 
##       Neg Pred Value            Precision               Recall 
##            0.7543754            0.8641351            0.9451187 
##                   F1           Prevalence       Detection Rate 
##            0.9028145            0.7592138            0.7175471 
## Detection Prevalence    Balanced Accuracy 
##            0.8303645            0.7382906
ad_tda_kde_5.40.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n3_svm_fit_re)
diff_tda_kde_5.40.5_svm_n3_3_fold
##     Accuracy
## 1 0.02998798
## 2 0.02938376
## 3 0.03053546
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n3_3_fold$probRight
bst_tda_kde_5.40.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009166667
## 
## $winRight
## [1] 0.9908333
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n3_3_fold
## $left
## [1] 4.615823e-05
## 
## $rope
## [1] 0.0001386839
## 
## $right
## [1] 0.9998152
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold))
#bf_tda_kde_5.40.5_svm.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold)
## t = 90.105, df = 2, p-value = 0.0001231
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.02853800 0.03140013
## sample estimates:
##  mean of x 
## 0.02996907
### Test set diff
diff_tda_kde_5.40.5_svm.n3_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n3_test
##    Accuracy 
## -0.01852989
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n3_test_odds.left<-bst_tda_kde_5.40.5_svm.n3_test$probLeft/bst_tda_kde_5.40.5_svm.n3_test$probRight
bst_tda_kde_5.40.5_svm.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n3_test
## $winLeft
## [1] 0.5431667
## 
## $winRope
## [1] 0.4568333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n3_test))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n3_test)) #bf_tda_kde_5.40.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n3_test))

##Node4

Adult_TDA_KDE_5.40.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.40.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 8741 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5828, 5827, 5827 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa       
##    0.1   0.25  0.8230179  0.2720075106
##    0.1   0.50  0.8311406  0.3489216475
##    0.1   0.75  0.8351447  0.3842342770
##    0.1   1.00  0.8381190  0.4091741614
##    0.1   1.25  0.8388057  0.4208327509
##    1.0   0.25  0.8060863  0.0992490769
##    1.0   0.50  0.8126070  0.1800648190
##    1.0   0.75  0.8171832  0.2342191615
##    1.0   1.00  0.8177554  0.2620286475
##    1.0   1.25  0.8175266  0.2812772185
##   10.0   0.25  0.7970484  0.0008982693
##   10.0   0.50  0.7969338  0.0099492462
##   10.0   0.75  0.7965904  0.0241208494
##   10.0   1.00  0.7979634  0.0552925407
##   10.0   1.25  0.8008236  0.0909170063
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n4_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8389976 0.4181952    Fold1
## 2 0.8438572 0.4422786    Fold3
## 3 0.8335621 0.4020245    Fold2
ad_tda_kde_5.40.5_n4_svm_fit_re<-Adult_TDA_KDE_5.40.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.40.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.40.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7135  1536
##      >50K     281   816
##                                           
##                Accuracy : 0.814           
##                  95% CI : (0.8061, 0.8217)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3779          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9621          
##             Specificity : 0.3469          
##          Pos Pred Value : 0.8229          
##          Neg Pred Value : 0.7438          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7304          
##    Detection Prevalence : 0.8877          
##       Balanced Accuracy : 0.6545          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7135  1536
##      >50K     281   816
##                                           
##                Accuracy : 0.814           
##                  95% CI : (0.8061, 0.8217)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3779          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9621          
##             Specificity : 0.3469          
##          Pos Pred Value : 0.8229          
##          Neg Pred Value : 0.7438          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7304          
##    Detection Prevalence : 0.8877          
##       Balanced Accuracy : 0.6545          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.139844e-01   3.778921e-01   8.061234e-01   8.216578e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.867829e-39  3.188120e-190
ad_tda_kde_5.40.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9621090            0.3469388            0.8228578 
##       Neg Pred Value            Precision               Recall 
##            0.7438469            0.8228578            0.9621090 
##                   F1           Prevalence       Detection Rate 
##            0.8870517            0.7592138            0.7304464 
## Detection Prevalence    Balanced Accuracy 
##            0.8876945            0.6545239
ad_tda_kde_5.40.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n4_svm_fit_re)
diff_tda_kde_5.40.5_svm_n4_3_fold
##      Accuracy
## 1 -0.01509657
## 2 -0.01853145
## 3 -0.01137206
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n4_3_fold$probRight
bst_tda_kde_5.40.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n4_3_fold
## $winLeft
## [1] 0.8341333
## 
## $winRope
## [1] 0.1658667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n4_3_fold
## $left
## [1] 0.9143909
## 
## $rope
## [1] 0.08111185
## 
## $right
## [1] 0.00449723
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold))
#bf_tda_kde_5.40.5_svm.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold)
## t = -7.2558, df = 2, p-value = 0.01847
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.023894911 -0.006105149
## sample estimates:
##   mean of x 
## -0.01500003
### Test set diff
diff_tda_kde_5.40.5_svm.n4_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n4_test
##   Accuracy 
## 0.01300164
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n4_test_odds.left<-bst_tda_kde_5.40.5_svm.n4_test$probLeft/bst_tda_kde_5.40.5_svm.n4_test$probRight
bst_tda_kde_5.40.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4563
## 
## $winRight
## [1] 0.5437
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n4_test))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n4_test)) #bf_tda_kde_5.40.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n4_test))

##Node5

Adult_TDA_KDE_5.40.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.40.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6900, 6901 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7834030  0.377727295
##    0.1   0.50  0.7966385  0.441966977
##    0.1   0.75  0.7988604  0.458211743
##    0.1   1.00  0.7988602  0.465076255
##    0.1   1.25  0.7983771  0.467523999
##    1.0   0.25  0.7439856  0.170709502
##    1.0   0.50  0.7564482  0.245145274
##    1.0   0.75  0.7643703  0.294259198
##    1.0   1.00  0.7670755  0.320583744
##    1.0   1.25  0.7663992  0.329880049
##   10.0   0.25  0.7147135  0.001256598
##   10.0   0.50  0.7179982  0.026031011
##   10.0   0.75  0.7242777  0.067356230
##   10.0   1.00  0.7276590  0.097153223
##   10.0   1.25  0.7293979  0.117707166
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.75.
Adult_TDA_KDE_5.40.5_n5_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8014493 0.4641849    Fold1
## 2 0.7945523 0.4409574    Fold2
## 3 0.8005797 0.4694929    Fold3
ad_tda_kde_5.40.5_n5_svm_fit_re<-Adult_TDA_KDE_5.40.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.40.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.40.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7057  1229
##      >50K     359  1123
##                                         
##                Accuracy : 0.8374        
##                  95% CI : (0.83, 0.8447)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.4911        
##                                         
##  Mcnemar's Test P-Value : < 2.2e-16     
##                                         
##             Sensitivity : 0.9516        
##             Specificity : 0.4775        
##          Pos Pred Value : 0.8517        
##          Neg Pred Value : 0.7578        
##              Prevalence : 0.7592        
##          Detection Rate : 0.7225        
##    Detection Prevalence : 0.8483        
##       Balanced Accuracy : 0.7145        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.40.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7057  1229
##      >50K     359  1123
##                                         
##                Accuracy : 0.8374        
##                  95% CI : (0.83, 0.8447)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.4911        
##                                         
##  Mcnemar's Test P-Value : < 2.2e-16     
##                                         
##             Sensitivity : 0.9516        
##             Specificity : 0.4775        
##          Pos Pred Value : 0.8517        
##          Neg Pred Value : 0.7578        
##              Prevalence : 0.7592        
##          Detection Rate : 0.7225        
##    Detection Prevalence : 0.8483        
##       Balanced Accuracy : 0.7145        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.40.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.374283e-01   4.910761e-01   8.299596e-01   8.446953e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.017543e-79  1.994170e-105
ad_tda_kde_5.40.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9515912            0.4774660            0.8516775 
##       Neg Pred Value            Precision               Recall 
##            0.7577598            0.8516775            0.9515912 
##                   F1           Prevalence       Detection Rate 
##            0.8988664            0.7592138            0.7224611 
## Detection Prevalence    Balanced Accuracy 
##            0.8482801            0.7145286
ad_tda_kde_5.40.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n5_svm_fit_re)
diff_tda_kde_5.40.5_svm_n5_3_fold
##     Accuracy
## 1 0.02245175
## 2 0.03077348
## 3 0.02161034
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n5_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n5_3_fold$probRight
bst_tda_kde_5.40.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n5_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009633333
## 
## $winRight
## [1] 0.9903667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n5_3_fold
## $left
## [1] 0.004603968
## 
## $rope
## [1] 0.01911759
## 
## $right
## [1] 0.9762784
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold))
#bf_tda_kde_5.40.5_svm.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold)
## t = 8.5305, df = 2, p-value = 0.01347
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01236316 0.03752723
## sample estimates:
##  mean of x 
## 0.02494519
### Test set diff
diff_tda_kde_5.40.5_svm.n5_test<-svm_cf_ov_acc-ad_tda_kde_5.40.5_n5_svm_cf0_ov_acc
diff_tda_kde_5.40.5_svm.n5_test
##    Accuracy 
## -0.01044226
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n5_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_svm.n5_test_odds.left<-bst_tda_kde_5.40.5_svm.n5_test$probLeft/bst_tda_kde_5.40.5_svm.n5_test$probRight
bst_tda_kde_5.40.5_svm.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n5_test
## $winLeft
## [1] 0.5395333
## 
## $winRope
## [1] 0.4604667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n5_test))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n5_test)) #bf_tda_kde_5.40.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n4_test))


#Non-TDA-Assisted

nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network 
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                            Importance = T,
                      method = 'nnet', 
                      trControl = fitControl,
                      tuneGrid = nn1Grid,
                      metric='Accuracy')
## # weights:  221
## initial  value 11629.741848 
## iter  10 value 8404.633624
## iter  20 value 8388.183097
## iter  30 value 7855.269958
## iter  40 value 7767.391165
## iter  50 value 7699.378577
## iter  60 value 7625.326110
## iter  70 value 7548.196492
## iter  80 value 7496.139199
## iter  90 value 7437.034038
## iter 100 value 7032.426324
## final  value 7032.426324 
## stopped after 100 iterations
## # weights:  331
## initial  value 9617.027718 
## iter  10 value 8226.246530
## iter  20 value 7945.275879
## iter  30 value 7787.313967
## iter  40 value 7786.735790
## iter  50 value 7772.786512
## iter  60 value 7766.331137
## iter  70 value 7761.640925
## iter  80 value 7756.551714
## iter  90 value 7743.855341
## iter 100 value 7571.852751
## final  value 7571.852751 
## stopped after 100 iterations
## # weights:  551
## initial  value 17404.802175 
## iter  10 value 7852.363583
## iter  20 value 7710.813019
## iter  30 value 7574.318909
## iter  40 value 7567.511013
## final  value 7562.886549 
## converged
## # weights:  771
## initial  value 8637.960391 
## iter  10 value 7793.980609
## iter  20 value 7762.128399
## iter  30 value 7753.531953
## iter  40 value 7606.087030
## iter  50 value 7470.598248
## iter  60 value 7192.097323
## iter  70 value 6128.153922
## iter  80 value 5416.556859
## iter  90 value 5337.424079
## iter 100 value 5245.759319
## final  value 5245.759319 
## stopped after 100 iterations
## # weights:  221
## initial  value 12473.916184 
## iter  10 value 7817.841489
## iter  20 value 7663.791179
## iter  30 value 7545.517018
## iter  40 value 7131.514356
## iter  50 value 6389.955849
## iter  60 value 6146.064097
## iter  70 value 6046.621048
## iter  80 value 5776.427856
## iter  90 value 5172.590943
## iter 100 value 4861.025334
## final  value 4861.025334 
## stopped after 100 iterations
## # weights:  331
## initial  value 10057.242945 
## iter  10 value 7794.531043
## iter  20 value 7724.291615
## iter  30 value 7635.564786
## iter  40 value 7541.821329
## iter  50 value 7386.157301
## iter  60 value 7090.283971
## iter  70 value 6640.344493
## iter  80 value 6302.815978
## iter  90 value 5921.153938
## iter 100 value 5396.223730
## final  value 5396.223730 
## stopped after 100 iterations
## # weights:  551
## initial  value 8554.938739 
## iter  10 value 8327.545112
## iter  20 value 7811.830943
## iter  30 value 7732.170574
## iter  40 value 7678.131061
## iter  50 value 7641.448376
## iter  60 value 7578.609440
## iter  70 value 7531.663357
## iter  80 value 7363.672633
## iter  90 value 7163.632624
## iter 100 value 6579.085908
## final  value 6579.085908 
## stopped after 100 iterations
## # weights:  771
## initial  value 8554.093702 
## iter  10 value 8367.755536
## iter  20 value 8230.673865
## iter  30 value 7811.841864
## iter  40 value 7761.912442
## iter  50 value 7748.513221
## iter  60 value 7729.517835
## iter  70 value 7630.898565
## iter  80 value 7562.774584
## iter  90 value 7517.735599
## iter 100 value 7462.030694
## final  value 7462.030694 
## stopped after 100 iterations
## # weights:  221
## initial  value 9291.600833 
## iter  10 value 8367.661767
## iter  20 value 7778.924219
## iter  30 value 7764.922158
## iter  40 value 7758.759573
## iter  50 value 7757.261349
## iter  60 value 7753.229296
## iter  70 value 7597.879121
## iter  80 value 6950.157587
## iter  90 value 6630.883136
## iter 100 value 6395.412937
## final  value 6395.412937 
## stopped after 100 iterations
## # weights:  331
## initial  value 8588.096604 
## iter  10 value 8337.094544
## iter  20 value 7950.522211
## iter  30 value 7917.620212
## iter  40 value 7754.734913
## iter  50 value 7741.843526
## iter  60 value 7737.005111
## iter  70 value 7694.533294
## iter  80 value 7616.368507
## iter  90 value 7580.772246
## iter 100 value 7529.368489
## final  value 7529.368489 
## stopped after 100 iterations
## # weights:  551
## initial  value 16411.432178 
## iter  10 value 8233.252963
## iter  20 value 7909.276011
## iter  30 value 7790.411320
## iter  40 value 7778.469722
## iter  50 value 7742.951602
## iter  60 value 7718.946054
## iter  70 value 7649.591064
## iter  80 value 7539.163996
## iter  90 value 7279.465327
## iter 100 value 7002.242162
## final  value 7002.242162 
## stopped after 100 iterations
## # weights:  771
## initial  value 9106.391912 
## iter  10 value 7820.454887
## iter  20 value 7797.090646
## iter  30 value 7710.854306
## iter  40 value 7670.835505
## iter  50 value 7663.182137
## iter  60 value 7649.622968
## iter  70 value 7479.348118
## iter  80 value 7419.749332
## iter  90 value 7317.350304
## iter 100 value 7258.364718
## final  value 7258.364718 
## stopped after 100 iterations
## # weights:  221
## initial  value 9610.156462 
## iter  10 value 7777.692025
## iter  20 value 7750.966723
## iter  30 value 7729.098034
## iter  40 value 7724.805424
## iter  50 value 7633.047425
## iter  60 value 7577.524089
## iter  70 value 7521.032220
## iter  80 value 7459.905675
## iter  90 value 7055.410753
## iter 100 value 6326.634920
## final  value 6326.634920 
## stopped after 100 iterations
## # weights:  331
## initial  value 12558.499576 
## iter  10 value 8008.913934
## iter  20 value 7648.552327
## iter  30 value 7619.614699
## iter  40 value 7524.924373
## iter  50 value 7503.170436
## iter  60 value 7481.133194
## iter  70 value 7458.891382
## iter  80 value 7328.010216
## iter  90 value 7156.356219
## iter 100 value 6692.241542
## final  value 6692.241542 
## stopped after 100 iterations
## # weights:  551
## initial  value 12380.225994 
## iter  10 value 8330.717080
## iter  20 value 7694.428585
## iter  30 value 7642.208249
## iter  40 value 7600.030144
## iter  50 value 7584.819298
## iter  60 value 7516.492226
## iter  70 value 7421.911817
## iter  80 value 7274.808269
## iter  90 value 7159.658453
## iter 100 value 7046.420660
## final  value 7046.420660 
## stopped after 100 iterations
## # weights:  771
## initial  value 13826.246329 
## iter  10 value 8302.232523
## iter  20 value 7783.837203
## iter  30 value 7629.959015
## iter  40 value 7544.817231
## iter  50 value 7504.910246
## iter  60 value 7438.365335
## iter  70 value 7375.958992
## iter  80 value 7260.039222
## iter  90 value 7197.944703
## iter 100 value 7093.593100
## final  value 7093.593100 
## stopped after 100 iterations
## # weights:  221
## initial  value 10263.335172 
## iter  10 value 8099.749382
## iter  20 value 7732.083237
## iter  30 value 7727.159475
## iter  40 value 7725.351145
## iter  50 value 7579.836736
## iter  60 value 7456.055274
## iter  70 value 7376.318677
## iter  80 value 7216.584612
## iter  90 value 6727.779988
## iter 100 value 6573.321377
## final  value 6573.321377 
## stopped after 100 iterations
## # weights:  331
## initial  value 18598.467508 
## iter  10 value 7805.611217
## iter  20 value 7674.429830
## iter  30 value 7358.723049
## iter  40 value 6659.905358
## iter  50 value 6481.701448
## iter  60 value 5615.373567
## iter  70 value 5343.977708
## iter  80 value 5095.989787
## iter  90 value 4920.588587
## iter 100 value 4854.556482
## final  value 4854.556482 
## stopped after 100 iterations
## # weights:  551
## initial  value 9554.597505 
## iter  10 value 8006.342771
## iter  20 value 7731.092779
## iter  30 value 7616.209839
## iter  40 value 7563.661869
## iter  50 value 7557.079966
## iter  60 value 7555.398224
## iter  70 value 7555.100036
## iter  80 value 7414.717845
## iter  90 value 7276.553457
## iter 100 value 6596.539808
## final  value 6596.539808 
## stopped after 100 iterations
## # weights:  771
## initial  value 9678.867457 
## iter  10 value 8285.962852
## iter  20 value 7757.106135
## iter  30 value 7721.907600
## iter  40 value 7708.298546
## iter  50 value 7692.119572
## iter  60 value 7543.519797
## iter  70 value 7491.609604
## iter  80 value 7460.221053
## iter  90 value 7447.108429
## iter 100 value 7430.751410
## final  value 7430.751410 
## stopped after 100 iterations
## # weights:  221
## initial  value 9042.892929 
## iter  10 value 8361.529468
## iter  20 value 7796.041553
## iter  30 value 7695.816636
## iter  40 value 7584.217177
## iter  50 value 7581.378662
## iter  60 value 7551.814987
## iter  70 value 7536.800358
## iter  80 value 7518.963273
## iter  90 value 7490.596810
## iter 100 value 7445.584911
## final  value 7445.584911 
## stopped after 100 iterations
## # weights:  331
## initial  value 8465.620235 
## iter  10 value 7861.684726
## iter  20 value 7737.750755
## iter  30 value 7687.522558
## iter  40 value 7683.602069
## iter  50 value 7669.238169
## iter  60 value 7638.726016
## iter  70 value 7433.825827
## iter  80 value 6854.521323
## iter  90 value 6025.900778
## iter 100 value 5356.132255
## final  value 5356.132255 
## stopped after 100 iterations
## # weights:  551
## initial  value 20850.731604 
## iter  10 value 8264.244072
## iter  20 value 7729.178744
## iter  30 value 7710.808875
## iter  40 value 7632.348546
## iter  50 value 7591.523351
## iter  60 value 7533.278425
## iter  70 value 7352.848851
## iter  80 value 7206.304109
## iter  90 value 6714.379139
## iter 100 value 6110.177830
## final  value 6110.177830 
## stopped after 100 iterations
## # weights:  771
## initial  value 9122.089677 
## iter  10 value 8335.713744
## iter  20 value 7747.778253
## iter  30 value 7656.764533
## iter  40 value 7548.455025
## iter  50 value 7525.584014
## iter  60 value 7480.014032
## iter  70 value 7361.175370
## iter  80 value 6996.872457
## iter  90 value 6379.834448
## iter 100 value 5606.125951
## final  value 5606.125951 
## stopped after 100 iterations
## # weights:  221
## initial  value 9613.221925 
## final  value 8389.316789 
## converged
## # weights:  331
## initial  value 17225.352190 
## iter  10 value 7909.926849
## iter  20 value 7900.912417
## iter  30 value 7751.510095
## iter  40 value 7732.869739
## iter  50 value 7676.818554
## iter  60 value 7564.248472
## iter  70 value 7550.319442
## iter  80 value 7550.250664
## iter  90 value 7548.785735
## iter 100 value 7542.281246
## final  value 7542.281246 
## stopped after 100 iterations
## # weights:  551
## initial  value 9767.278558 
## iter  10 value 8221.098929
## iter  20 value 7699.981749
## iter  30 value 7697.199372
## iter  40 value 7670.696919
## iter  50 value 7657.530851
## iter  60 value 7619.676272
## iter  70 value 7404.564385
## iter  80 value 7385.851065
## iter  90 value 7108.911593
## iter 100 value 6717.570990
## final  value 6717.570990 
## stopped after 100 iterations
## # weights:  771
## initial  value 11207.863505 
## iter  10 value 7808.832604
## iter  20 value 7744.220896
## iter  30 value 7739.299506
## iter  40 value 7734.893387
## iter  50 value 7730.663360
## iter  60 value 7710.062795
## iter  70 value 7688.368847
## iter  80 value 7634.430710
## iter  90 value 7485.435662
## iter 100 value 7452.444331
## final  value 7452.444331 
## stopped after 100 iterations
## # weights:  221
## initial  value 8571.871616 
## iter  10 value 8390.760363
## iter  20 value 8389.355198
## iter  30 value 8389.338858
## iter  30 value 8389.338790
## iter  30 value 8389.338724
## final  value 8389.338724 
## converged
## # weights:  331
## initial  value 18374.046492 
## iter  10 value 8390.539328
## iter  20 value 8389.357308
## iter  30 value 8389.338840
## iter  30 value 8389.338778
## iter  30 value 8389.338760
## final  value 8389.338760 
## converged
## # weights:  551
## initial  value 8902.541198 
## iter  10 value 8345.608310
## iter  20 value 7844.059793
## iter  30 value 7778.170863
## iter  40 value 6956.605429
## iter  50 value 6067.809074
## iter  60 value 5718.071850
## iter  70 value 5369.389992
## iter  80 value 5189.885666
## iter  90 value 5004.665450
## iter 100 value 4937.034564
## final  value 4937.034564 
## stopped after 100 iterations
## # weights:  771
## initial  value 12523.715791 
## iter  10 value 8176.208141
## iter  20 value 7740.916425
## iter  30 value 7726.479510
## iter  40 value 7715.584892
## iter  50 value 7678.202144
## iter  60 value 7632.786379
## iter  70 value 7570.873765
## iter  80 value 7544.762233
## iter  90 value 7469.122873
## iter 100 value 7465.773503
## final  value 7465.773503 
## stopped after 100 iterations
## # weights:  221
## initial  value 8398.848615 
## iter  10 value 7762.582119
## iter  20 value 7672.792303
## iter  30 value 7519.264416
## iter  40 value 7436.261325
## iter  50 value 7387.471742
## iter  60 value 6509.652314
## iter  70 value 5855.832568
## iter  80 value 5494.715972
## iter  90 value 5258.104011
## iter 100 value 5113.593139
## final  value 5113.593139 
## stopped after 100 iterations
## # weights:  331
## initial  value 9068.512789 
## iter  10 value 8389.433189
## final  value 8389.426830 
## converged
## # weights:  551
## initial  value 8429.860299 
## iter  10 value 8096.097339
## iter  20 value 7751.620165
## iter  30 value 7677.004000
## iter  40 value 7629.781811
## iter  50 value 7583.169765
## iter  60 value 7542.074893
## iter  70 value 7411.631706
## iter  80 value 7370.989789
## iter  90 value 7347.794903
## iter 100 value 7215.506312
## final  value 7215.506312 
## stopped after 100 iterations
## # weights:  771
## initial  value 8987.929263 
## iter  10 value 7786.198804
## iter  20 value 7773.766467
## iter  30 value 7699.864446
## iter  40 value 7660.231585
## iter  50 value 7554.419407
## iter  60 value 7533.095787
## iter  70 value 7464.242833
## iter  80 value 7437.318014
## iter  90 value 7424.675389
## iter 100 value 7421.365856
## final  value 7421.365856 
## stopped after 100 iterations
## # weights:  221
## initial  value 15157.138791 
## iter  10 value 12584.329249
## final  value 12582.716271 
## converged
adultNn1Fit
## Neural Network 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15195, 15196 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7919084  0.2951526
##   2     0.5    0.8008142  0.2630162
##   2     0.7    0.8199899  0.4250339
##   3     0.3    0.7956391  0.3331653
##   3     0.5    0.8195472  0.3758177
##   3     0.7    0.8025252  0.2665166
##   5     0.3    0.7952438  0.2718914
##   5     0.5    0.8195511  0.4093591
##   5     0.7    0.8016057  0.4044819
##   7     0.3    0.8144159  0.3853184
##   7     0.5    0.7980080  0.2762891
##   7     0.7    0.8102920  0.3821858
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
adultNn1Fit$resample
##    Accuracy     Kappa Resample
## 1 0.8499408 0.5788401    Fold3
## 2 0.7929718 0.3048361    Fold2
## 3 0.8170571 0.3914254    Fold1
ad_nn1_fit_re<-adultNn1Fit$resample[1]

summary(adultNn1Fit)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o 
## -0.57  0.00 -0.57
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
nn1_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass 
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_PC_5.40.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 837.414238 
## iter  10 value 103.737182
## iter  20 value 102.458842
## iter  30 value 101.774210
## iter  40 value 101.205567
## iter  50 value 101.090879
## iter  60 value 101.040971
## iter  70 value 101.010415
## final  value 101.010412 
## converged
## # weights:  331
## initial  value 955.671454 
## iter  10 value 110.207267
## iter  20 value 106.876041
## iter  30 value 103.439238
## iter  40 value 102.311889
## iter  50 value 101.743073
## iter  60 value 101.326195
## iter  70 value 101.082315
## iter  80 value 100.936649
## iter  90 value 100.676155
## iter 100 value 100.600236
## final  value 100.600236 
## stopped after 100 iterations
## # weights:  551
## initial  value 1152.397778 
## iter  10 value 120.126095
## iter  20 value 102.061519
## iter  30 value 101.900168
## iter  40 value 101.235515
## iter  50 value 100.599606
## iter  60 value 100.579420
## iter  70 value 100.359751
## iter  80 value 100.305178
## iter  90 value 100.293545
## iter 100 value 100.293154
## final  value 100.293154 
## stopped after 100 iterations
## # weights:  771
## initial  value 2564.700605 
## iter  10 value 108.886670
## iter  20 value 107.751749
## iter  30 value 107.597436
## iter  40 value 107.205297
## iter  50 value 107.067517
## iter  60 value 106.752081
## iter  70 value 106.668090
## iter  80 value 106.325166
## iter  90 value 100.056543
## iter 100 value 98.881307
## final  value 98.881307 
## stopped after 100 iterations
## # weights:  221
## initial  value 1518.400930 
## iter  10 value 114.467111
## iter  20 value 105.990971
## iter  30 value 105.639716
## iter  40 value 102.864557
## iter  50 value 102.682544
## iter  60 value 102.679633
## final  value 102.679433 
## converged
## # weights:  331
## initial  value 1805.136576 
## iter  10 value 117.247285
## iter  20 value 115.857565
## iter  30 value 104.826760
## iter  40 value 103.892989
## iter  50 value 103.860592
## iter  60 value 103.423156
## iter  70 value 102.226602
## iter  80 value 102.031741
## iter  90 value 102.022828
## iter 100 value 101.959261
## final  value 101.959261 
## stopped after 100 iterations
## # weights:  551
## initial  value 3714.982633 
## iter  10 value 111.367283
## iter  20 value 107.807802
## iter  30 value 103.105630
## iter  40 value 101.936700
## iter  50 value 101.936508
## final  value 101.936463 
## converged
## # weights:  771
## initial  value 1587.128988 
## iter  10 value 106.276624
## iter  20 value 103.096019
## iter  30 value 102.346959
## iter  40 value 101.792973
## iter  50 value 101.659114
## iter  60 value 101.658714
## iter  70 value 101.646745
## iter  80 value 101.450882
## iter  90 value 101.450503
## iter 100 value 101.450165
## final  value 101.450165 
## stopped after 100 iterations
## # weights:  221
## initial  value 1126.411930 
## iter  10 value 106.991142
## iter  20 value 105.538130
## iter  30 value 105.396706
## iter  40 value 105.379993
## final  value 105.379836 
## converged
## # weights:  331
## initial  value 1284.664180 
## iter  10 value 107.364980
## iter  20 value 105.779124
## iter  30 value 105.495241
## iter  40 value 105.369487
## iter  50 value 104.262306
## iter  60 value 103.113124
## iter  70 value 101.574485
## iter  80 value 100.850194
## iter  90 value 100.757096
## iter 100 value 100.729303
## final  value 100.729303 
## stopped after 100 iterations
## # weights:  551
## initial  value 1345.748224 
## iter  10 value 128.611918
## iter  20 value 108.625963
## iter  30 value 103.916031
## iter  40 value 103.322253
## iter  50 value 102.114127
## iter  60 value 102.043037
## iter  70 value 102.021153
## iter  80 value 102.018748
## final  value 102.018426 
## converged
## # weights:  771
## initial  value 2954.630951 
## iter  10 value 135.183190
## iter  20 value 113.917598
## iter  30 value 102.607083
## iter  40 value 102.409326
## iter  50 value 102.376939
## iter  60 value 101.229898
## iter  70 value 97.975112
## iter  80 value 97.634486
## iter  90 value 97.248608
## iter 100 value 96.848272
## final  value 96.848272 
## stopped after 100 iterations
## # weights:  221
## initial  value 1324.355656 
## iter  10 value 114.020386
## iter  20 value 106.782381
## iter  30 value 102.933330
## iter  40 value 102.719570
## iter  50 value 102.702615
## iter  60 value 102.701317
## iter  70 value 102.649590
## iter  80 value 101.161637
## iter  90 value 100.628071
## iter 100 value 97.923546
## final  value 97.923546 
## stopped after 100 iterations
## # weights:  331
## initial  value 635.770654 
## iter  10 value 108.579383
## iter  20 value 101.971074
## iter  30 value 101.433644
## iter  40 value 101.156302
## iter  50 value 100.157790
## iter  60 value 98.922572
## iter  70 value 96.410758
## iter  80 value 93.647664
## iter  90 value 91.096687
## iter 100 value 80.914526
## final  value 80.914526 
## stopped after 100 iterations
## # weights:  551
## initial  value 3515.505335 
## iter  10 value 264.456468
## iter  20 value 107.647054
## iter  30 value 101.771916
## iter  40 value 101.721508
## iter  50 value 101.645958
## iter  60 value 99.954168
## iter  70 value 95.862133
## iter  80 value 91.940617
## iter  90 value 85.688060
## iter 100 value 75.200350
## final  value 75.200350 
## stopped after 100 iterations
## # weights:  771
## initial  value 2098.950313 
## iter  10 value 109.947153
## iter  20 value 105.439669
## iter  30 value 104.001946
## iter  40 value 102.304321
## iter  50 value 100.464593
## iter  60 value 100.266391
## iter  70 value 100.227342
## iter  80 value 99.749155
## iter  90 value 92.229742
## iter 100 value 79.905920
## final  value 79.905920 
## stopped after 100 iterations
## # weights:  221
## initial  value 916.940332 
## iter  10 value 120.746402
## iter  20 value 105.883426
## iter  30 value 105.873279
## iter  40 value 105.871903
## iter  50 value 105.773813
## iter  60 value 105.642152
## iter  70 value 104.276453
## iter  80 value 102.190223
## iter  90 value 90.140219
## iter 100 value 84.688957
## final  value 84.688957 
## stopped after 100 iterations
## # weights:  331
## initial  value 2429.776282 
## iter  10 value 107.174018
## iter  20 value 106.562223
## iter  30 value 103.963901
## iter  40 value 103.140708
## iter  50 value 102.939349
## iter  60 value 102.916899
## iter  70 value 102.912359
## final  value 102.912295 
## converged
## # weights:  551
## initial  value 1371.765063 
## iter  10 value 109.196898
## iter  20 value 107.454391
## iter  30 value 103.505350
## iter  40 value 100.402249
## iter  50 value 99.285273
## iter  60 value 97.473061
## iter  70 value 94.914944
## iter  80 value 94.370787
## iter  90 value 92.912581
## iter 100 value 87.030523
## final  value 87.030523 
## stopped after 100 iterations
## # weights:  771
## initial  value 1601.927662 
## iter  10 value 107.331251
## iter  20 value 103.986272
## iter  30 value 101.910374
## iter  40 value 101.772244
## iter  50 value 101.460754
## iter  60 value 100.519089
## iter  70 value 99.040983
## iter  80 value 96.939028
## iter  90 value 95.214478
## iter 100 value 90.455163
## final  value 90.455163 
## stopped after 100 iterations
## # weights:  221
## initial  value 579.438455 
## iter  10 value 130.873640
## iter  20 value 105.392012
## iter  30 value 105.390702
## iter  40 value 105.388549
## final  value 105.388423 
## converged
## # weights:  331
## initial  value 1430.313237 
## iter  10 value 106.550253
## iter  20 value 104.473659
## iter  30 value 104.065273
## iter  40 value 104.062359
## iter  50 value 104.062179
## final  value 104.062173 
## converged
## # weights:  551
## initial  value 1315.360471 
## iter  10 value 110.479699
## iter  20 value 104.120376
## iter  30 value 104.009507
## iter  40 value 103.939309
## iter  50 value 102.987919
## iter  60 value 102.730819
## iter  70 value 102.485352
## iter  80 value 101.562792
## iter  90 value 98.741759
## iter 100 value 94.929681
## final  value 94.929681 
## stopped after 100 iterations
## # weights:  771
## initial  value 1099.917191 
## iter  10 value 107.821940
## iter  20 value 106.846983
## iter  30 value 102.715150
## iter  40 value 102.112818
## iter  50 value 100.777573
## iter  60 value 98.930876
## iter  70 value 98.647621
## iter  80 value 98.364021
## iter  90 value 97.314394
## iter 100 value 92.744542
## final  value 92.744542 
## stopped after 100 iterations
## # weights:  221
## initial  value 1759.907618 
## iter  10 value 125.824731
## iter  20 value 113.772065
## iter  30 value 108.526773
## iter  40 value 108.203546
## iter  50 value 108.198743
## iter  60 value 108.195749
## final  value 108.195483 
## converged
## # weights:  331
## initial  value 1290.108643 
## iter  10 value 110.637738
## iter  20 value 107.803030
## iter  30 value 107.735075
## iter  40 value 106.564554
## iter  50 value 106.563919
## iter  60 value 106.563127
## iter  70 value 106.223955
## iter  80 value 104.969804
## iter  90 value 103.257492
## iter 100 value 102.482960
## final  value 102.482960 
## stopped after 100 iterations
## # weights:  551
## initial  value 2123.086282 
## iter  10 value 192.810402
## iter  20 value 108.697960
## iter  30 value 108.167215
## iter  40 value 106.767132
## iter  50 value 105.767496
## iter  60 value 105.250709
## iter  70 value 105.117014
## iter  80 value 104.724324
## iter  90 value 104.228610
## iter 100 value 101.442802
## final  value 101.442802 
## stopped after 100 iterations
## # weights:  771
## initial  value 1450.708239 
## iter  10 value 110.050575
## iter  20 value 105.662264
## iter  30 value 104.901939
## iter  40 value 103.997111
## iter  50 value 97.951029
## iter  60 value 86.907843
## iter  70 value 81.635697
## iter  80 value 81.370504
## iter  90 value 81.158073
## iter 100 value 79.947698
## final  value 79.947698 
## stopped after 100 iterations
## # weights:  221
## initial  value 831.993347 
## iter  10 value 110.489318
## iter  20 value 110.022006
## iter  30 value 106.597803
## iter  40 value 105.663163
## iter  50 value 105.577736
## iter  60 value 105.289331
## iter  70 value 105.250240
## iter  80 value 105.249290
## final  value 105.243768 
## converged
## # weights:  331
## initial  value 2233.767871 
## iter  10 value 203.852069
## iter  20 value 115.899507
## iter  30 value 109.116812
## iter  40 value 108.505379
## iter  50 value 106.333694
## iter  60 value 106.211646
## iter  70 value 104.355052
## iter  80 value 103.433913
## iter  90 value 102.240148
## iter 100 value 94.197182
## final  value 94.197182 
## stopped after 100 iterations
## # weights:  551
## initial  value 1563.857258 
## iter  10 value 111.270549
## iter  20 value 110.792524
## iter  30 value 108.660709
## iter  40 value 108.601860
## iter  50 value 108.192868
## iter  60 value 107.524965
## iter  70 value 107.132521
## iter  80 value 106.259084
## iter  90 value 105.821881
## iter 100 value 105.590769
## final  value 105.590769 
## stopped after 100 iterations
## # weights:  771
## initial  value 4751.830627 
## iter  10 value 110.264762
## iter  20 value 109.470304
## iter  30 value 106.304656
## iter  40 value 105.491086
## iter  50 value 105.025467
## iter  60 value 104.279570
## iter  70 value 100.033385
## iter  80 value 98.683845
## iter  90 value 95.235222
## iter 100 value 85.113444
## final  value 85.113444 
## stopped after 100 iterations
## # weights:  221
## initial  value 2009.149001 
## iter  10 value 116.051987
## iter  20 value 113.549227
## iter  30 value 111.324634
## iter  40 value 110.200185
## iter  50 value 110.129455
## iter  60 value 109.855425
## iter  70 value 107.799523
## iter  80 value 106.635389
## iter  90 value 106.617977
## final  value 106.617520 
## converged
## # weights:  331
## initial  value 2182.015305 
## iter  10 value 147.402476
## iter  20 value 109.363777
## iter  30 value 107.769240
## iter  40 value 106.497484
## iter  50 value 106.463973
## iter  60 value 106.454040
## iter  70 value 106.367251
## iter  80 value 106.330648
## iter  90 value 106.328732
## final  value 106.328704 
## converged
## # weights:  551
## initial  value 3027.433426 
## iter  10 value 130.841226
## iter  20 value 110.518161
## iter  30 value 109.410107
## iter  40 value 108.132537
## iter  50 value 103.807011
## iter  60 value 103.695580
## iter  70 value 103.684989
## final  value 103.684948 
## converged
## # weights:  771
## initial  value 3101.627390 
## iter  10 value 112.425151
## iter  20 value 108.178211
## iter  30 value 107.365002
## iter  40 value 106.911531
## iter  50 value 105.088015
## iter  60 value 103.855799
## iter  70 value 102.152717
## iter  80 value 100.905060
## iter  90 value 98.636805
## iter 100 value 92.694069
## final  value 92.694069 
## stopped after 100 iterations
## # weights:  221
## initial  value 1305.010691 
## iter  10 value 162.404838
## iter  20 value 160.072617
## iter  30 value 159.254733
## iter  40 value 158.669017
## iter  50 value 158.663626
## iter  60 value 158.662565
## iter  70 value 158.662179
## iter  70 value 158.662177
## iter  70 value 158.662177
## final  value 158.662177 
## converged
Adult_TDA_PC_5.40.5_n1_NN1Fit0
## Neural Network 
## 
## 3373 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 2248, 2249, 2249 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa
##   2     0.3    0.9922918  0    
##   2     0.5    0.9922918  0    
##   2     0.7    0.9922918  0    
##   3     0.3    0.9922918  0    
##   3     0.5    0.9922918  0    
##   3     0.7    0.9922918  0    
##   5     0.3    0.9922918  0    
##   5     0.5    0.9922918  0    
##   5     0.7    0.9922918  0    
##   7     0.3    0.9922918  0    
##   7     0.5    0.9922918  0    
##   7     0.7    0.9922918  0    
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.40.5_n1_NN1Fit0$resample
##    Accuracy Kappa Resample
## 1 0.9928826     0    Fold3
## 2 0.9919929     0    Fold2
## 3 0.9920000     0    Fold1
ad_tda_pc_5.40.5_n1_nn1_fit_re<-Adult_TDA_PC_5.40.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n1_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.01     0.30     0.00     0.00     0.01     0.00     0.01     0.01 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.03     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.02    -0.03     0.01 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.01     0.00     0.00     0.00    -0.03     0.00     0.00     0.01 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.01     0.01     0.00     0.00     0.00     0.00    -0.02     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.01     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.01     0.00     0.01 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.04    -0.01     0.05     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.01     0.00     0.00 
##  b->o h1->o h2->o 
##  2.29  2.29  0.81
vip(Adult_TDA_PC_5.40.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.40.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.40.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n1_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n1_3_fold
##     Accuracy
## 1 -0.1429418
## 2 -0.1990210
## 3 -0.1749429
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9918667
## 
## $winRope
## [1] 0.008133333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n1_3_fold
## $left
## [1] 0.9934541
## 
## $rope
## [1] 0.001336362
## 
## $right
## [1] 0.005209553
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold))
#bf_tda_pca_5.40.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold)
## t = -10.608, df = 2, p-value = 0.00877
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2421875 -0.1024163
## sample estimates:
##  mean of x 
## -0.1723019
### Test set diff
diff_tda_pca_5.40.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n1_test
##  Accuracy 
## 0.5184275
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n1_test_odds.left<-bst_tda_pca_5.40.5_nn1.n1_test$probLeft/bst_tda_pca_5.40.5_nn1.n1_test$probRight
bst_tda_pca_5.40.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1583
## 
## $winRight
## [1] 0.8417
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n1_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n1_test)) #bf_tda_pca_5.40.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

##Adult_TDA_PC_5.40.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, size=2, range = 0.6,, type='class')

#Neural Network 1
Adult_TDA_PC_5.40.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4773.379182 
## iter  10 value 4573.862247
## iter  20 value 4422.199280
## iter  30 value 4195.569257
## iter  40 value 3947.376138
## iter  50 value 3903.420040
## iter  60 value 3855.397584
## iter  70 value 3774.068612
## iter  80 value 3725.873311
## iter  90 value 3697.575285
## iter 100 value 3681.810334
## final  value 3681.810334 
## stopped after 100 iterations
## # weights:  331
## initial  value 4811.534329 
## iter  10 value 4516.327001
## iter  20 value 4393.461012
## iter  30 value 4386.851485
## iter  40 value 4377.013909
## iter  50 value 4376.160326
## iter  60 value 4373.109590
## iter  70 value 4364.791141
## iter  80 value 4340.525366
## iter  90 value 4298.801950
## iter 100 value 4284.552370
## final  value 4284.552370 
## stopped after 100 iterations
## # weights:  551
## initial  value 4859.175205 
## iter  10 value 4506.902464
## iter  20 value 4481.102970
## iter  30 value 4407.885549
## iter  40 value 4395.514834
## iter  50 value 4385.493066
## iter  60 value 4383.370678
## iter  70 value 4335.136335
## iter  80 value 4334.000989
## iter  90 value 4327.772010
## iter 100 value 4317.925076
## final  value 4317.925076 
## stopped after 100 iterations
## # weights:  771
## initial  value 4659.816325 
## iter  10 value 4440.321513
## iter  20 value 4389.346615
## iter  30 value 4344.469838
## iter  40 value 4335.145320
## iter  50 value 4334.687493
## iter  60 value 4334.418156
## iter  70 value 4333.748007
## iter  80 value 4333.032004
## iter  90 value 4331.475343
## iter 100 value 4325.927923
## final  value 4325.927923 
## stopped after 100 iterations
## # weights:  221
## initial  value 4616.343195 
## iter  10 value 4578.629493
## iter  20 value 4400.889965
## iter  30 value 4397.401706
## iter  40 value 4386.098961
## iter  50 value 4383.491126
## iter  60 value 4379.290946
## iter  70 value 4378.346270
## iter  80 value 4377.889730
## iter  90 value 4377.083195
## iter 100 value 4376.035418
## final  value 4376.035418 
## stopped after 100 iterations
## # weights:  331
## initial  value 7057.303705 
## iter  10 value 4578.774283
## iter  20 value 4458.595263
## iter  30 value 4368.399393
## iter  40 value 4357.711850
## iter  50 value 4354.987641
## iter  60 value 4329.539156
## iter  70 value 4286.549563
## iter  80 value 4272.888069
## iter  90 value 4248.842678
## iter 100 value 4131.843513
## final  value 4131.843513 
## stopped after 100 iterations
## # weights:  551
## initial  value 4995.047097 
## iter  10 value 4477.276178
## iter  20 value 4455.638983
## iter  30 value 4319.471090
## iter  40 value 4308.357541
## iter  50 value 4283.242711
## iter  60 value 4205.724434
## iter  70 value 4173.539510
## iter  80 value 4120.736428
## iter  90 value 3988.239542
## iter 100 value 3877.695568
## final  value 3877.695568 
## stopped after 100 iterations
## # weights:  771
## initial  value 5892.314142 
## iter  10 value 4567.619904
## iter  20 value 4406.983745
## iter  30 value 4344.600405
## iter  40 value 4306.456495
## iter  50 value 4300.293258
## iter  60 value 4280.057034
## iter  70 value 4204.028272
## iter  80 value 4120.560136
## iter  90 value 3975.535280
## iter 100 value 3821.903844
## final  value 3821.903844 
## stopped after 100 iterations
## # weights:  221
## initial  value 5481.571387 
## iter  10 value 4565.125882
## iter  20 value 4435.418895
## iter  30 value 4421.170590
## iter  40 value 4402.514545
## iter  50 value 4290.903977
## iter  60 value 4142.893155
## iter  70 value 3987.056842
## iter  80 value 3954.378184
## iter  90 value 3873.060697
## iter 100 value 3736.662120
## final  value 3736.662120 
## stopped after 100 iterations
## # weights:  331
## initial  value 5167.625930 
## iter  10 value 4543.920596
## iter  20 value 4425.699659
## iter  30 value 4420.567451
## iter  40 value 4384.302969
## iter  50 value 4340.699921
## iter  60 value 4318.728314
## iter  70 value 4304.921397
## iter  80 value 4267.641274
## iter  90 value 4201.205850
## iter 100 value 4059.326861
## final  value 4059.326861 
## stopped after 100 iterations
## # weights:  551
## initial  value 4677.258014 
## iter  10 value 4578.176653
## iter  20 value 4415.844407
## iter  30 value 4398.886824
## iter  40 value 4395.798374
## iter  50 value 4393.942397
## iter  60 value 4375.899122
## iter  70 value 4312.346439
## iter  80 value 4285.460161
## iter  90 value 4280.519260
## iter 100 value 4276.863574
## final  value 4276.863574 
## stopped after 100 iterations
## # weights:  771
## initial  value 4678.855412 
## iter  10 value 4547.429166
## iter  20 value 4378.731244
## iter  30 value 4368.107494
## iter  40 value 4358.928862
## iter  50 value 4332.035149
## iter  60 value 4322.773138
## iter  70 value 4301.916958
## iter  80 value 4281.476377
## iter  90 value 4238.906771
## iter 100 value 4230.370126
## final  value 4230.370126 
## stopped after 100 iterations
## # weights:  221
## initial  value 6832.111609 
## iter  10 value 4553.000331
## iter  20 value 4534.804671
## iter  30 value 4395.579183
## iter  40 value 4381.684346
## final  value 4381.681933 
## converged
## # weights:  331
## initial  value 5749.589992 
## iter  10 value 4578.971771
## iter  20 value 4578.690670
## iter  30 value 4473.884516
## iter  40 value 4383.692124
## iter  50 value 4378.782016
## iter  60 value 4369.362247
## iter  70 value 4358.462098
## iter  80 value 4305.828352
## iter  90 value 4224.475100
## iter 100 value 4145.055475
## final  value 4145.055475 
## stopped after 100 iterations
## # weights:  551
## initial  value 5292.533345 
## iter  10 value 4433.272933
## iter  20 value 4390.867691
## iter  30 value 4389.804563
## iter  40 value 4312.302249
## iter  50 value 4273.190113
## iter  60 value 4264.515196
## iter  70 value 4262.091358
## iter  80 value 4256.335181
## iter  90 value 4252.670130
## iter 100 value 4248.097698
## final  value 4248.097698 
## stopped after 100 iterations
## # weights:  771
## initial  value 4714.350141 
## iter  10 value 4541.911043
## iter  20 value 4386.160148
## iter  30 value 4362.557462
## iter  40 value 4359.168329
## iter  50 value 4358.452600
## iter  60 value 4354.312594
## iter  70 value 4271.556184
## iter  80 value 4199.460175
## iter  90 value 4133.767243
## iter 100 value 4126.989203
## final  value 4126.989203 
## stopped after 100 iterations
## # weights:  221
## initial  value 4859.537003 
## iter  10 value 4483.120924
## iter  20 value 4387.428303
## iter  30 value 4385.359203
## iter  40 value 4374.733744
## iter  50 value 4340.685243
## iter  60 value 4272.647781
## iter  70 value 4205.327309
## iter  80 value 4148.969608
## iter  90 value 4077.799250
## iter 100 value 3957.466331
## final  value 3957.466331 
## stopped after 100 iterations
## # weights:  331
## initial  value 5962.177139 
## iter  10 value 4553.355943
## iter  20 value 4412.737951
## iter  30 value 4371.039169
## iter  40 value 4370.235871
## iter  50 value 4355.582810
## iter  60 value 4307.617631
## iter  70 value 4257.766846
## iter  80 value 4240.867806
## iter  90 value 4208.851579
## iter 100 value 4196.627102
## final  value 4196.627102 
## stopped after 100 iterations
## # weights:  551
## initial  value 5044.588995 
## iter  10 value 4472.690821
## iter  20 value 4423.382430
## iter  30 value 4329.689220
## iter  40 value 4323.114531
## iter  50 value 4320.589367
## iter  60 value 4316.891088
## iter  70 value 4313.329775
## iter  80 value 4310.524059
## iter  90 value 4238.416789
## iter 100 value 4219.600404
## final  value 4219.600404 
## stopped after 100 iterations
## # weights:  771
## initial  value 4722.981089 
## iter  10 value 4574.914100
## iter  20 value 4364.144824
## iter  30 value 4320.462400
## iter  40 value 4304.876400
## iter  50 value 4280.976530
## iter  60 value 4271.134943
## iter  70 value 4257.822495
## iter  80 value 4253.925701
## iter  90 value 4249.226413
## iter 100 value 4234.692118
## final  value 4234.692118 
## stopped after 100 iterations
## # weights:  221
## initial  value 6219.281095 
## iter  10 value 4569.094634
## iter  20 value 4441.055019
## iter  30 value 4377.237782
## iter  40 value 4327.533995
## iter  50 value 4283.239315
## iter  60 value 4262.744263
## iter  70 value 4258.689661
## iter  80 value 4255.431722
## iter  90 value 4241.516085
## iter 100 value 4193.733658
## final  value 4193.733658 
## stopped after 100 iterations
## # weights:  331
## initial  value 4695.736697 
## iter  10 value 4474.773951
## iter  20 value 4354.946419
## iter  30 value 4329.092426
## iter  40 value 4315.455269
## iter  50 value 4310.761826
## iter  60 value 4310.416498
## iter  70 value 4310.352843
## iter  80 value 4307.713727
## iter  90 value 4300.824696
## iter 100 value 4246.176570
## final  value 4246.176570 
## stopped after 100 iterations
## # weights:  551
## initial  value 4951.325686 
## iter  10 value 4555.671139
## iter  20 value 4393.185024
## iter  30 value 4299.391947
## iter  40 value 4272.332261
## iter  50 value 4255.963684
## iter  60 value 4254.470936
## iter  70 value 4248.365320
## iter  80 value 4242.363659
## iter  90 value 4215.625004
## iter 100 value 4131.616526
## final  value 4131.616526 
## stopped after 100 iterations
## # weights:  771
## initial  value 4995.921172 
## iter  10 value 4558.957036
## iter  20 value 4404.141057
## iter  30 value 4377.228778
## iter  40 value 4373.623531
## iter  50 value 4372.872522
## iter  60 value 4367.760492
## iter  70 value 4365.508721
## iter  80 value 4301.135187
## iter  90 value 4258.063902
## iter 100 value 4082.929451
## final  value 4082.929451 
## stopped after 100 iterations
## # weights:  221
## initial  value 4697.511328 
## iter  10 value 4380.950668
## iter  20 value 4336.729260
## iter  30 value 4275.191257
## iter  40 value 4225.520283
## iter  50 value 4153.684907
## iter  60 value 3903.150164
## iter  70 value 3774.072868
## iter  80 value 3718.120586
## iter  90 value 3679.214207
## iter 100 value 3649.695949
## final  value 3649.695949 
## stopped after 100 iterations
## # weights:  331
## initial  value 4599.888499 
## iter  10 value 4519.975025
## iter  20 value 4406.677936
## iter  30 value 4403.396172
## iter  40 value 4402.992786
## iter  50 value 4392.463114
## iter  60 value 4389.901480
## iter  70 value 4387.234680
## iter  80 value 4385.165207
## iter  90 value 4383.556294
## iter 100 value 4378.063055
## final  value 4378.063055 
## stopped after 100 iterations
## # weights:  551
## initial  value 5440.404732 
## iter  10 value 4549.130620
## iter  20 value 4397.150438
## iter  30 value 4391.197912
## iter  40 value 4378.952086
## iter  50 value 4330.150196
## iter  60 value 4319.792731
## iter  70 value 4307.641637
## iter  80 value 4286.033743
## iter  90 value 4267.214390
## iter 100 value 4193.003651
## final  value 4193.003651 
## stopped after 100 iterations
## # weights:  771
## initial  value 4639.208815 
## iter  10 value 4414.515064
## iter  20 value 4350.671968
## iter  30 value 4258.974945
## iter  40 value 4251.454417
## iter  50 value 4241.434615
## iter  60 value 4187.853729
## iter  70 value 4111.386165
## iter  80 value 3979.772354
## iter  90 value 3879.682819
## iter 100 value 3768.818713
## final  value 3768.818713 
## stopped after 100 iterations
## # weights:  221
## initial  value 4926.176857 
## iter  10 value 4559.509528
## iter  20 value 4534.342183
## iter  30 value 4534.232315
## final  value 4534.232243 
## converged
## # weights:  331
## initial  value 6102.832679 
## iter  10 value 4578.801481
## iter  20 value 4578.591163
## iter  30 value 4384.112953
## iter  40 value 4373.291937
## iter  50 value 4371.932384
## iter  60 value 4363.108704
## iter  70 value 4360.574028
## iter  80 value 4319.285512
## iter  90 value 4304.409146
## iter 100 value 4240.338029
## final  value 4240.338029 
## stopped after 100 iterations
## # weights:  551
## initial  value 5097.738552 
## iter  10 value 4478.069276
## iter  20 value 4434.236068
## iter  30 value 4406.984621
## iter  40 value 4397.624476
## iter  50 value 4335.825898
## iter  60 value 4303.712333
## iter  70 value 4249.426709
## iter  80 value 4247.263253
## iter  90 value 4230.932156
## iter 100 value 4225.478552
## final  value 4225.478552 
## stopped after 100 iterations
## # weights:  771
## initial  value 8081.060377 
## iter  10 value 4691.171290
## iter  20 value 4471.418174
## iter  30 value 4364.472584
## iter  40 value 4312.302809
## iter  50 value 4281.957898
## iter  60 value 4275.473877
## iter  70 value 4271.771033
## iter  80 value 4263.638006
## iter  90 value 4245.440283
## iter 100 value 4207.403859
## final  value 4207.403859 
## stopped after 100 iterations
## # weights:  221
## initial  value 4604.865846 
## iter  10 value 4578.480010
## iter  20 value 4570.901270
## iter  30 value 4521.880368
## iter  40 value 4399.513607
## iter  50 value 4385.637429
## iter  60 value 4381.983065
## iter  70 value 4310.405644
## iter  80 value 4273.692408
## iter  90 value 4246.976082
## iter 100 value 4156.577599
## final  value 4156.577599 
## stopped after 100 iterations
## # weights:  331
## initial  value 4888.154951 
## iter  10 value 4481.277947
## iter  20 value 4415.365045
## iter  30 value 4408.254736
## iter  40 value 4407.464298
## iter  50 value 4404.005895
## iter  60 value 4403.476040
## iter  70 value 4383.063550
## iter  80 value 4350.736440
## iter  90 value 4323.261265
## iter 100 value 4249.457299
## final  value 4249.457299 
## stopped after 100 iterations
## # weights:  551
## initial  value 8004.739127 
## iter  10 value 4579.675390
## iter  20 value 4417.973158
## iter  30 value 4407.830764
## iter  40 value 4399.981308
## iter  50 value 4399.875305
## iter  60 value 4385.351351
## iter  70 value 4375.403665
## iter  80 value 4307.573317
## iter  90 value 4026.059251
## iter 100 value 3987.845128
## final  value 3987.845128 
## stopped after 100 iterations
## # weights:  771
## initial  value 4667.715329 
## iter  10 value 4415.812118
## iter  20 value 4374.879811
## iter  30 value 4352.899860
## iter  40 value 4306.365050
## iter  50 value 4296.930133
## iter  60 value 4266.457461
## iter  70 value 4244.253660
## iter  80 value 4240.544006
## iter  90 value 4238.953900
## iter 100 value 4236.683826
## final  value 4236.683826 
## stopped after 100 iterations
## # weights:  221
## initial  value 7194.655172 
## iter  10 value 6705.695442
## iter  20 value 6461.267374
## iter  30 value 6420.878980
## iter  40 value 6384.852355
## iter  50 value 6251.085130
## iter  60 value 6135.787816
## iter  70 value 6015.381926
## iter  80 value 5956.915998
## iter  90 value 5825.944075
## iter 100 value 5745.030675
## final  value 5745.030675 
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n2_NN1Fit0
## Neural Network 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6851, 6850 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.6729236  0.24028243
##   2     0.5    0.6348797  0.10677450
##   2     0.7    0.6414004  0.11773175
##   3     0.3    0.6251474  0.08292598
##   3     0.5    0.6213487  0.05674173
##   3     0.7    0.6338092  0.11008402
##   5     0.3    0.6108408  0.00000000
##   5     0.5    0.6426656  0.13093242
##   5     0.7    0.6483015  0.19178607
##   7     0.3    0.6460636  0.14700438
##   7     0.5    0.6413988  0.15756464
##   7     0.7    0.6339065  0.09782916
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.3.
Adult_TDA_PC_5.40.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7095738 0.3690617    Fold3
## 2 0.6108029 0.0000000    Fold2
## 3 0.6983942 0.3517856    Fold1
ad_tda_pc_5.40.5_n2_nn1_fit_re<-Adult_TDA_PC_5.40.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.03     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.02     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.03     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -1.33     0.01    -0.46    -1.05     0.25     0.00    -0.89    -0.82 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.99     0.31     0.34     0.00    -0.51    -0.54    -0.45    -0.96 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -0.34     0.29    -0.69     1.10     1.22     0.67    -0.19     0.46 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -1.35     0.00    -0.34     0.31    -0.26    -1.42    -0.52     1.70 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.13    -0.54    -0.24    -0.18    -0.46    -1.32    -0.01     0.88 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.48     2.14    -0.27    -0.75    -0.43     0.00     0.10    -0.20 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.36    -0.97     0.80     3.09    -1.51    -0.80    -0.24    -0.54 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -1.34     1.05     0.87    -3.46    -0.97     1.18    -1.32    -0.01 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00    -0.01     0.20    -1.28    -0.45     1.68     0.79 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.46     0.66    -0.81    -0.04    -0.64    -1.49    -0.98     1.42 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##    -0.01     0.13     0.00     0.00     0.11    -0.12     2.06     0.39 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -1.25     0.02    -0.54     0.37     0.16    -1.20    -0.13     0.09 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##    -0.33    -1.58     0.34     0.15    -0.60    -0.68    -0.21     1.56 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.66    -0.22    -0.20     0.66     0.43 
##  b->o h1->o h2->o 
##  1.10  1.11 -3.10
vip(Adult_TDA_PC_5.40.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1023   386
##      >50K    6393  1966
##                                           
##                Accuracy : 0.306           
##                  95% CI : (0.2969, 0.3152)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.014          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1379          
##             Specificity : 0.8359          
##          Pos Pred Value : 0.7260          
##          Neg Pred Value : 0.2352          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1047          
##    Detection Prevalence : 0.1442          
##       Balanced Accuracy : 0.4869          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1023   386
##      >50K    6393  1966
##                                           
##                Accuracy : 0.306           
##                  95% CI : (0.2969, 0.3152)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.014          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1379          
##             Specificity : 0.8359          
##          Pos Pred Value : 0.7260          
##          Neg Pred Value : 0.2352          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1047          
##    Detection Prevalence : 0.1442          
##       Balanced Accuracy : 0.4869          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.30599918    -0.01398009     0.29686821     0.31524595     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.40.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1379450            0.8358844            0.7260468 
##       Neg Pred Value            Precision               Recall 
##            0.2351956            0.7260468            0.1379450 
##                   F1           Prevalence       Detection Rate 
##            0.2318414            0.7592138            0.1047297 
## Detection Prevalence    Balanced Accuracy 
##            0.1442465            0.4869147
ad_tda_pc_5.40.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n2_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n2_3_fold
##    Accuracy
## 1 0.1403669
## 2 0.1821689
## 3 0.1186630
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008833333
## 
## $winRight
## [1] 0.9911667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n2_3_fold
## $left
## [1] 0.00912915
## 
## $rope
## [1] 0.002757308
## 
## $right
## [1] 0.9881135
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold))
#bf_tda_pca_5.40.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold)
## t = 7.8915, df = 2, p-value = 0.01568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.0668816 0.2272509
## sample estimates:
## mean of x 
## 0.1470663
### Test set diff
diff_tda_pca_5.40.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n2_test
##  Accuracy 
## 0.4532146
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n2_test_odds.left<-bst_tda_pca_5.40.5_nn1.n2_test$probLeft/bst_tda_pca_5.40.5_nn1.n2_test$probRight
bst_tda_pca_5.40.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1590667
## 
## $winRight
## [1] 0.8409333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n2_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n2_test)) #bf_tda_pca_5.40.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node3

#Neural Network 1
Adult_TDA_PC_5.40.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 3955.295507 
## iter  10 value 3781.943343
## iter  20 value 3760.057532
## iter  30 value 3654.501332
## iter  40 value 3649.457264
## iter  50 value 3648.827611
## iter  60 value 3642.120553
## iter  70 value 3624.794168
## iter  80 value 3615.880371
## iter  90 value 3502.233978
## iter 100 value 3325.726199
## final  value 3325.726199 
## stopped after 100 iterations
## # weights:  331
## initial  value 4181.416680 
## iter  10 value 3850.364005
## iter  20 value 3745.159916
## iter  30 value 3737.552609
## iter  40 value 3627.031758
## iter  50 value 3593.481970
## iter  60 value 3332.411931
## iter  70 value 3010.873412
## iter  80 value 2648.391820
## iter  90 value 2593.348680
## iter 100 value 2538.454952
## final  value 2538.454952 
## stopped after 100 iterations
## # weights:  551
## initial  value 5218.511345 
## iter  10 value 3892.837730
## iter  20 value 3854.371530
## iter  30 value 3715.595410
## iter  40 value 3554.492140
## iter  50 value 3537.467117
## iter  60 value 3494.845286
## iter  70 value 3383.507163
## iter  80 value 3159.147010
## iter  90 value 2973.533466
## iter 100 value 2780.800458
## final  value 2780.800458 
## stopped after 100 iterations
## # weights:  771
## initial  value 5698.722132 
## iter  10 value 3927.905136
## iter  20 value 3894.361564
## iter  30 value 3688.134407
## iter  40 value 3656.564112
## iter  50 value 3533.001938
## iter  60 value 3452.471383
## iter  70 value 3353.692582
## iter  80 value 3020.222576
## iter  90 value 2638.843433
## iter 100 value 2612.652296
## final  value 2612.652296 
## stopped after 100 iterations
## # weights:  221
## initial  value 5896.567481 
## iter  10 value 3928.017530
## iter  20 value 3643.796812
## iter  30 value 3589.664385
## iter  40 value 3547.301092
## iter  50 value 3485.741323
## iter  60 value 3241.582642
## iter  70 value 2989.855725
## iter  80 value 2782.135961
## iter  90 value 2635.352857
## iter 100 value 2572.752285
## final  value 2572.752285 
## stopped after 100 iterations
## # weights:  331
## initial  value 5567.537509 
## iter  10 value 3812.586066
## iter  20 value 3687.558151
## iter  30 value 3643.701562
## iter  40 value 3618.142081
## iter  50 value 3578.220672
## iter  60 value 3545.890999
## iter  70 value 3509.955357
## iter  80 value 3466.087280
## iter  90 value 3403.195497
## iter 100 value 3216.321043
## final  value 3216.321043 
## stopped after 100 iterations
## # weights:  551
## initial  value 4184.778780 
## iter  10 value 3947.242389
## iter  20 value 3934.349997
## iter  30 value 3708.807366
## iter  40 value 3543.013547
## iter  50 value 3524.568237
## iter  60 value 3477.054528
## iter  70 value 3339.824447
## iter  80 value 3128.657139
## iter  90 value 3012.588971
## iter 100 value 2906.986228
## final  value 2906.986228 
## stopped after 100 iterations
## # weights:  771
## initial  value 4227.762468 
## iter  10 value 3945.756783
## iter  20 value 3818.532125
## iter  30 value 3668.774469
## iter  40 value 3649.576934
## iter  50 value 3643.491774
## iter  60 value 3577.263347
## iter  70 value 3570.988475
## iter  80 value 3563.821399
## iter  90 value 3561.826720
## iter 100 value 3507.158642
## final  value 3507.158642 
## stopped after 100 iterations
## # weights:  221
## initial  value 4326.630909 
## iter  10 value 3813.912066
## iter  20 value 3674.164419
## iter  30 value 3645.884100
## iter  40 value 3613.466391
## iter  50 value 3517.292569
## iter  60 value 3391.772524
## iter  70 value 2930.940476
## iter  80 value 2650.973297
## iter  90 value 2567.268074
## iter 100 value 2527.060518
## final  value 2527.060518 
## stopped after 100 iterations
## # weights:  331
## initial  value 4614.450119 
## iter  10 value 3734.967722
## iter  20 value 3624.396705
## iter  30 value 3585.221565
## iter  40 value 3551.099984
## iter  50 value 3547.394394
## iter  60 value 3512.050104
## iter  70 value 3504.932820
## iter  80 value 3500.826937
## iter  90 value 3493.637179
## iter 100 value 3491.744644
## final  value 3491.744644 
## stopped after 100 iterations
## # weights:  551
## initial  value 5324.107772 
## iter  10 value 3938.740484
## iter  20 value 3709.519382
## iter  30 value 3644.579210
## iter  40 value 3609.043519
## iter  50 value 3568.759201
## iter  60 value 3522.806709
## iter  70 value 3507.686808
## iter  80 value 3495.177385
## iter  90 value 3468.470583
## iter 100 value 3460.235365
## final  value 3460.235365 
## stopped after 100 iterations
## # weights:  771
## initial  value 6275.237206 
## iter  10 value 3856.578084
## iter  20 value 3816.503024
## iter  30 value 3684.670392
## iter  40 value 3634.655529
## iter  50 value 3596.094055
## iter  60 value 3557.547768
## iter  70 value 3554.934558
## iter  80 value 3547.427395
## iter  90 value 3529.601415
## iter 100 value 3497.729066
## final  value 3497.729066 
## stopped after 100 iterations
## # weights:  221
## initial  value 4747.737032 
## iter  10 value 3796.723768
## iter  20 value 3614.591542
## iter  30 value 3555.554245
## iter  40 value 3523.284586
## iter  50 value 3404.457291
## iter  60 value 3186.895069
## iter  70 value 2910.772053
## iter  80 value 2777.132646
## iter  90 value 2698.440009
## iter 100 value 2535.889528
## final  value 2535.889528 
## stopped after 100 iterations
## # weights:  331
## initial  value 7482.730340 
## iter  10 value 3930.317146
## iter  20 value 3703.098307
## iter  30 value 3682.751799
## iter  40 value 3672.177846
## iter  50 value 3661.633352
## iter  60 value 3546.369946
## iter  70 value 3032.872206
## iter  80 value 2818.943597
## iter  90 value 2770.073339
## iter 100 value 2695.789482
## final  value 2695.789482 
## stopped after 100 iterations
## # weights:  551
## initial  value 3944.363193 
## iter  10 value 3792.996170
## iter  20 value 3677.691785
## iter  30 value 3587.058376
## iter  40 value 3554.569309
## iter  50 value 3548.598752
## iter  60 value 3547.317841
## iter  70 value 3545.959171
## iter  80 value 3544.573915
## iter  90 value 3543.019260
## iter 100 value 3509.895853
## final  value 3509.895853 
## stopped after 100 iterations
## # weights:  771
## initial  value 6531.770447 
## iter  10 value 3796.164166
## iter  20 value 3724.297570
## iter  20 value 3724.297552
## iter  30 value 3675.795383
## iter  40 value 3626.931992
## iter  50 value 3618.942999
## iter  60 value 3616.305509
## iter  70 value 3605.338685
## iter  80 value 3596.098675
## iter  90 value 3583.875461
## iter 100 value 3489.357348
## final  value 3489.357348 
## stopped after 100 iterations
## # weights:  221
## initial  value 4187.386163 
## iter  10 value 3718.167102
## iter  20 value 3596.663614
## iter  30 value 3527.192912
## iter  40 value 3438.785584
## iter  50 value 3138.091158
## iter  60 value 2743.575117
## iter  70 value 2639.850778
## iter  80 value 2630.817916
## iter  90 value 2628.909675
## iter 100 value 2623.750392
## final  value 2623.750392 
## stopped after 100 iterations
## # weights:  331
## initial  value 9739.689265 
## iter  10 value 3803.338926
## iter  20 value 3756.299153
## iter  30 value 3635.902795
## iter  40 value 3634.344319
## iter  50 value 3593.593402
## iter  60 value 3154.173843
## iter  70 value 3066.967915
## iter  80 value 2952.191504
## iter  90 value 2730.607210
## iter 100 value 2599.468730
## final  value 2599.468730 
## stopped after 100 iterations
## # weights:  551
## initial  value 5251.904838 
## iter  10 value 3934.997583
## iter  20 value 3703.592295
## iter  30 value 3694.649612
## iter  40 value 3581.029540
## iter  50 value 3569.168768
## iter  60 value 3558.544011
## iter  70 value 3540.202740
## iter  80 value 3521.625668
## iter  90 value 3335.099245
## iter 100 value 2973.057238
## final  value 2973.057238 
## stopped after 100 iterations
## # weights:  771
## initial  value 6281.345748 
## iter  10 value 3748.584218
## iter  20 value 3639.374557
## iter  30 value 3555.290805
## iter  40 value 3537.312008
## iter  50 value 3494.934771
## iter  60 value 3118.754078
## iter  70 value 2901.872881
## iter  80 value 2861.873013
## iter  90 value 2706.044796
## iter 100 value 2595.700851
## final  value 2595.700851 
## stopped after 100 iterations
## # weights:  221
## initial  value 4901.329450 
## iter  10 value 3934.572498
## iter  20 value 3715.299253
## iter  30 value 3692.798794
## iter  40 value 3681.359874
## iter  50 value 3676.593144
## iter  60 value 3667.529269
## iter  70 value 3640.537480
## iter  80 value 3595.979354
## iter  90 value 3401.827402
## iter 100 value 3200.570050
## final  value 3200.570050 
## stopped after 100 iterations
## # weights:  331
## initial  value 7197.307374 
## iter  10 value 3875.718936
## iter  20 value 3870.295303
## iter  30 value 3711.787325
## iter  40 value 3710.509278
## iter  50 value 3710.388317
## iter  60 value 3704.796874
## iter  70 value 3702.938082
## iter  80 value 3594.470913
## iter  90 value 3510.090813
## iter 100 value 3323.086581
## final  value 3323.086581 
## stopped after 100 iterations
## # weights:  551
## initial  value 4285.060878 
## iter  10 value 3889.950922
## iter  20 value 3710.116519
## iter  30 value 3583.791093
## iter  40 value 3269.246460
## iter  50 value 2961.879180
## iter  60 value 2792.521613
## iter  70 value 2717.991068
## iter  80 value 2622.066945
## iter  90 value 2583.619565
## iter 100 value 2559.095879
## final  value 2559.095879 
## stopped after 100 iterations
## # weights:  771
## initial  value 7334.765810 
## iter  10 value 3940.781326
## iter  20 value 3906.811389
## iter  30 value 3714.285954
## iter  40 value 3704.454605
## final  value 3702.160431 
## converged
## # weights:  221
## initial  value 4888.829580 
## iter  10 value 3935.004455
## iter  20 value 3747.100573
## iter  30 value 3676.089487
## iter  40 value 3670.204413
## iter  50 value 3585.278163
## iter  60 value 3344.717768
## iter  70 value 3033.554009
## iter  80 value 2708.656854
## iter  90 value 2615.054621
## iter 100 value 2613.304744
## final  value 2613.304744 
## stopped after 100 iterations
## # weights:  331
## initial  value 4334.085183 
## iter  10 value 3943.363355
## iter  20 value 3943.251919
## iter  30 value 3733.605517
## iter  40 value 3640.055783
## iter  50 value 3525.366513
## iter  60 value 3153.566695
## iter  70 value 2730.433017
## iter  80 value 2593.412565
## iter  90 value 2546.627491
## iter 100 value 2494.515146
## final  value 2494.515146 
## stopped after 100 iterations
## # weights:  551
## initial  value 8605.550174 
## iter  10 value 3855.809833
## iter  20 value 3635.990200
## iter  30 value 3590.120576
## iter  40 value 3537.764303
## iter  50 value 3491.939404
## iter  60 value 3263.062398
## iter  70 value 3124.816794
## iter  80 value 3012.774463
## iter  90 value 2837.086473
## iter 100 value 2668.592924
## final  value 2668.592924 
## stopped after 100 iterations
## # weights:  771
## initial  value 14862.213732 
## iter  10 value 3724.307711
## iter  20 value 3692.242479
## iter  30 value 3689.172530
## iter  40 value 3686.733627
## iter  50 value 3686.587806
## iter  60 value 3651.056916
## iter  70 value 3644.283172
## iter  80 value 3643.375248
## final  value 3643.374273 
## converged
## # weights:  221
## initial  value 6599.352407 
## iter  10 value 3827.780922
## iter  20 value 3685.908923
## iter  30 value 3680.862875
## iter  40 value 3666.646063
## iter  50 value 3598.215805
## iter  60 value 3554.635650
## iter  70 value 3524.623732
## iter  80 value 3514.933273
## iter  90 value 3499.888326
## iter 100 value 3426.358301
## final  value 3426.358301 
## stopped after 100 iterations
## # weights:  331
## initial  value 5013.495488 
## iter  10 value 3944.603978
## iter  20 value 3922.797896
## iter  30 value 3920.346524
## iter  40 value 3748.408722
## iter  50 value 3745.008182
## iter  60 value 3688.354261
## iter  70 value 3676.446389
## iter  80 value 3671.167426
## iter  90 value 3663.930693
## iter 100 value 3385.048993
## final  value 3385.048993 
## stopped after 100 iterations
## # weights:  551
## initial  value 8755.052937 
## iter  10 value 3937.316494
## iter  20 value 3928.565749
## iter  30 value 3835.937653
## iter  40 value 3559.578989
## iter  50 value 3488.036573
## iter  60 value 3139.132235
## iter  70 value 2895.771367
## iter  80 value 2853.149149
## iter  90 value 2828.651004
## iter 100 value 2813.266486
## final  value 2813.266486 
## stopped after 100 iterations
## # weights:  771
## initial  value 8075.865874 
## iter  10 value 3926.383328
## iter  20 value 3687.995998
## iter  30 value 3668.845386
## iter  40 value 3663.927199
## iter  50 value 3658.936728
## iter  60 value 3598.938339
## iter  70 value 3538.213337
## iter  80 value 3531.879292
## iter  90 value 3529.496735
## iter 100 value 3506.078179
## final  value 3506.078179 
## stopped after 100 iterations
## # weights:  221
## initial  value 5292.934413 
## iter  10 value 3924.273586
## iter  20 value 3667.346976
## iter  30 value 3587.559862
## iter  40 value 3535.379395
## iter  50 value 3523.302493
## iter  60 value 3507.874777
## iter  70 value 3472.370173
## iter  80 value 3363.510020
## iter  90 value 3284.228158
## iter 100 value 2950.794861
## final  value 2950.794861 
## stopped after 100 iterations
## # weights:  331
## initial  value 6162.505380 
## iter  10 value 3939.181455
## iter  20 value 3718.328940
## iter  30 value 3675.894666
## iter  40 value 3654.751896
## iter  50 value 3611.793269
## iter  60 value 3581.715644
## iter  70 value 3544.528792
## iter  80 value 3502.421027
## iter  90 value 3321.884975
## iter 100 value 3112.164768
## final  value 3112.164768 
## stopped after 100 iterations
## # weights:  551
## initial  value 5813.814791 
## iter  10 value 3950.276468
## iter  20 value 3936.166982
## iter  30 value 3811.582479
## iter  40 value 3676.817169
## iter  50 value 3508.630339
## iter  60 value 3291.397918
## iter  70 value 3112.810345
## iter  80 value 2973.828180
## iter  90 value 2755.055078
## iter 100 value 2607.110247
## final  value 2607.110247 
## stopped after 100 iterations
## # weights:  771
## initial  value 5703.573167 
## iter  10 value 4220.397008
## iter  20 value 3718.403852
## iter  30 value 3682.490049
## iter  40 value 3666.934976
## iter  50 value 3625.270492
## iter  60 value 3588.029677
## iter  70 value 3548.035582
## iter  80 value 3520.258507
## iter  90 value 3507.837967
## iter 100 value 3486.173834
## final  value 3486.173834 
## stopped after 100 iterations
## # weights:  331
## initial  value 8413.774862 
## iter  10 value 5905.959520
## iter  20 value 5614.470453
## iter  30 value 5571.650843
## iter  40 value 5536.440739
## iter  50 value 5359.220766
## iter  60 value 5321.698322
## iter  70 value 5281.724945
## iter  80 value 5242.088053
## iter  90 value 5144.087085
## iter 100 value 4904.167315
## final  value 4904.167315 
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n3_NN1Fit0
## Neural Network 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7709, 7708, 7709 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8470968  0.4414374
##   2     0.5    0.8555725  0.4751267
##   2     0.7    0.8512483  0.4382265
##   3     0.3    0.8627520  0.5578521
##   3     0.5    0.8343827  0.3412189
##   3     0.7    0.8305804  0.3670681
##   5     0.3    0.8428640  0.4573135
##   5     0.5    0.8525477  0.5260171
##   5     0.7    0.8522859  0.4656912
##   7     0.3    0.8332623  0.3240830
##   7     0.5    0.8399178  0.3521745
##   7     0.7    0.8209806  0.2473099
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_PC_5.40.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8632590 0.5442643    Fold3
## 2 0.8612192 0.5703556    Fold2
## 3 0.8637779 0.5589364    Fold1
ad_tda_pc_5.40.5_n3_nn1_fit_re<-Adult_TDA_PC_5.40.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -1.21    -0.05    -0.65    -1.49    -0.53     0.00     0.64    -0.01 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     2.74    -1.82    -0.09     0.00     2.65    -4.00    -2.40    -0.08 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##    -0.02    -0.88     0.49     1.06     0.27     2.44    -0.75     2.78 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -1.29    -0.29     0.44    -1.62    -0.13     1.16    -1.58     2.48 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.29     0.10    -1.45    -1.63    -0.65    -0.84    -0.16     0.91 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -1.17     1.22     2.42     1.00    -3.12     0.05    -1.05     1.25 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.93    -1.14     1.02     2.94     0.37    -3.30    -0.19    -2.37 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     1.34    -1.89    -0.76    -3.22     0.13     4.53    -3.42     2.22 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00    -0.04     0.02    -0.31     0.06    -1.37    -0.21 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.14     0.37    -0.04    -1.42    -0.24    -0.36    -0.09    -1.17 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.25     0.12     0.00     0.00    -0.64    -0.33     0.99     1.12 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.35    -0.26     0.02     1.76    -0.50    -0.71     0.34    -0.42 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.12    -1.69    -1.41    -0.22     0.07    -0.56     1.81    -0.52 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.68     1.31     2.48     1.21     0.12 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00    -0.02     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00    -0.01     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
##  0.20 -2.45  0.01  0.36
vip(Adult_TDA_PC_5.40.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4316  1759
##      >50K    3100   593
##                                           
##                Accuracy : 0.5026          
##                  95% CI : (0.4926, 0.5125)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1389         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5820          
##             Specificity : 0.2521          
##          Pos Pred Value : 0.7105          
##          Neg Pred Value : 0.1606          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4419          
##    Detection Prevalence : 0.6219          
##       Balanced Accuracy : 0.4171          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4316  1759
##      >50K    3100   593
##                                           
##                Accuracy : 0.5026          
##                  95% CI : (0.4926, 0.5125)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1389         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5820          
##             Specificity : 0.2521          
##          Pos Pred Value : 0.7105          
##          Neg Pred Value : 0.1606          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4419          
##    Detection Prevalence : 0.6219          
##       Balanced Accuracy : 0.4171          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.025594e-01  -1.388585e-01   4.925933e-01   5.125239e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   2.355701e-82
ad_tda_pc_5.40.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.5819849            0.2521259            0.7104527 
##       Neg Pred Value            Precision               Recall 
##            0.1605741            0.7104527            0.5819849 
##                   F1           Prevalence       Detection Rate 
##            0.6398340            0.7592138            0.4418509 
## Detection Prevalence    Balanced Accuracy 
##            0.6219287            0.4170554
ad_tda_pc_5.40.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n3_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n3_3_fold
##      Accuracy
## 1 -0.01331819
## 2 -0.06824736
## 3 -0.04672077
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n3_3_fold
## $winLeft
## [1] 0.9630667
## 
## $winRope
## [1] 0.03693333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n3_3_fold
## $left
## [1] 0.8911024
## 
## $rope
## [1] 0.05707951
## 
## $right
## [1] 0.05181807
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold))
#bf_tda_pca_5.40.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold)
## t = -2.676, df = 2, p-value = 0.1159
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.11151740  0.02599319
## sample estimates:
##   mean of x 
## -0.04276211
### Test set diff
diff_tda_pca_5.40.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n3_test
##  Accuracy 
## 0.2566544
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n3_test_odds.left<-bst_tda_pca_5.40.5_nn1.n3_test$probLeft/bst_tda_pca_5.40.5_nn1.n3_test$probRight
bst_tda_pca_5.40.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1626
## 
## $winRight
## [1] 0.8374
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n3_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n3_test)) #bf_tda_pca_5.40.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test))


##Node4

#Neural Network 1
Adult_TDA_PC_5.40.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8284.101886 
## iter  10 value 1741.933557
## iter  20 value 1741.831713
## iter  30 value 1741.770934
## iter  40 value 1741.455747
## iter  50 value 1741.415556
## iter  60 value 1632.821214
## iter  70 value 1573.412172
## iter  80 value 1521.550168
## iter  90 value 1507.379563
## iter 100 value 1497.034421
## final  value 1497.034421 
## stopped after 100 iterations
## # weights:  331
## initial  value 13379.405334 
## iter  10 value 1745.024175
## iter  20 value 1743.367144
## iter  30 value 1743.344910
## iter  30 value 1743.344898
## iter  40 value 1641.981735
## iter  50 value 1599.739199
## iter  60 value 1596.743822
## iter  70 value 1586.703835
## iter  80 value 1534.175627
## iter  90 value 1513.221771
## iter 100 value 1502.225402
## final  value 1502.225402 
## stopped after 100 iterations
## # weights:  551
## initial  value 13037.268847 
## iter  10 value 1798.909989
## iter  20 value 1532.142558
## iter  30 value 1515.936116
## iter  40 value 1502.979483
## iter  50 value 1494.317317
## iter  60 value 1479.584409
## iter  70 value 1443.432370
## iter  80 value 1357.942722
## iter  90 value 1334.371734
## iter 100 value 1309.199627
## final  value 1309.199627 
## stopped after 100 iterations
## # weights:  771
## initial  value 8717.667487 
## iter  10 value 1831.675739
## iter  20 value 1744.362147
## iter  30 value 1737.535963
## iter  40 value 1707.653412
## iter  50 value 1664.337423
## iter  60 value 1535.088197
## iter  70 value 1521.696727
## iter  80 value 1518.405469
## iter  90 value 1515.852314
## iter 100 value 1497.583281
## final  value 1497.583281 
## stopped after 100 iterations
## # weights:  221
## initial  value 9054.931299 
## iter  10 value 1743.305023
## iter  20 value 1742.802888
## iter  30 value 1709.039006
## iter  40 value 1603.048099
## iter  50 value 1597.357999
## iter  60 value 1591.221960
## iter  70 value 1586.795105
## iter  80 value 1558.891198
## iter  90 value 1523.562225
## iter 100 value 1502.719165
## final  value 1502.719165 
## stopped after 100 iterations
## # weights:  331
## initial  value 4162.877348 
## iter  10 value 1743.796344
## iter  20 value 1742.832176
## iter  30 value 1742.797385
## iter  40 value 1742.038304
## iter  50 value 1741.993324
## final  value 1741.991655 
## converged
## # weights:  551
## initial  value 4797.235452 
## iter  10 value 1616.808905
## iter  20 value 1594.029675
## iter  30 value 1593.599211
## iter  40 value 1592.610226
## iter  50 value 1589.006188
## iter  60 value 1585.300670
## iter  70 value 1581.193765
## iter  80 value 1544.250539
## iter  90 value 1480.872916
## iter 100 value 1401.340464
## final  value 1401.340464 
## stopped after 100 iterations
## # weights:  771
## initial  value 10450.582586 
## iter  10 value 1594.271255
## iter  20 value 1591.275380
## final  value 1591.271172 
## converged
## # weights:  221
## initial  value 9118.992269 
## iter  10 value 1750.483612
## iter  20 value 1745.681374
## iter  30 value 1741.836709
## iter  40 value 1740.226266
## iter  50 value 1615.072671
## iter  60 value 1569.864420
## iter  70 value 1540.148893
## iter  80 value 1527.588242
## iter  90 value 1523.528807
## iter 100 value 1501.310270
## final  value 1501.310270 
## stopped after 100 iterations
## # weights:  331
## initial  value 5486.219496 
## iter  10 value 1745.720722
## iter  20 value 1719.107421
## iter  30 value 1623.654534
## iter  40 value 1581.167787
## iter  50 value 1559.446996
## iter  60 value 1542.004760
## iter  70 value 1523.288867
## iter  80 value 1519.535383
## iter  90 value 1510.013262
## iter 100 value 1509.154054
## final  value 1509.154054 
## stopped after 100 iterations
## # weights:  551
## initial  value 7152.124017 
## iter  10 value 1839.847387
## iter  20 value 1767.138235
## iter  30 value 1696.407457
## iter  40 value 1624.869630
## iter  50 value 1573.944708
## iter  60 value 1465.805484
## iter  70 value 1400.876133
## iter  80 value 1385.108418
## iter  90 value 1377.278088
## iter 100 value 1313.188595
## final  value 1313.188595 
## stopped after 100 iterations
## # weights:  771
## initial  value 4362.922168 
## iter  10 value 1740.394404
## iter  20 value 1737.771328
## iter  30 value 1737.046480
## iter  40 value 1598.133109
## iter  50 value 1567.022739
## iter  60 value 1563.985768
## iter  70 value 1559.555313
## iter  80 value 1554.446323
## iter  90 value 1532.794400
## iter 100 value 1517.823311
## final  value 1517.823311 
## stopped after 100 iterations
## # weights:  221
## initial  value 4612.921893 
## iter  10 value 1743.619727
## iter  20 value 1693.822740
## iter  30 value 1590.430508
## iter  40 value 1588.957604
## iter  50 value 1580.699887
## iter  60 value 1579.734944
## iter  70 value 1555.211776
## iter  80 value 1539.185950
## iter  90 value 1490.299331
## iter 100 value 1481.813697
## final  value 1481.813697 
## stopped after 100 iterations
## # weights:  331
## initial  value 11339.736302 
## iter  10 value 1745.017890
## iter  20 value 1743.205043
## iter  30 value 1741.174306
## iter  40 value 1739.939820
## iter  50 value 1595.412208
## iter  60 value 1547.878534
## iter  70 value 1505.282061
## iter  80 value 1491.892752
## iter  90 value 1490.636252
## iter 100 value 1489.222173
## final  value 1489.222173 
## stopped after 100 iterations
## # weights:  551
## initial  value 2477.254837 
## iter  10 value 1741.812049
## iter  20 value 1741.352319
## iter  30 value 1741.346448
## iter  40 value 1731.787572
## iter  50 value 1731.164170
## iter  60 value 1709.630484
## iter  70 value 1680.738732
## iter  80 value 1636.099611
## iter  90 value 1635.311278
## iter 100 value 1605.864357
## final  value 1605.864357 
## stopped after 100 iterations
## # weights:  771
## initial  value 8345.656368 
## iter  10 value 1740.085251
## iter  20 value 1628.264254
## iter  30 value 1576.758074
## iter  40 value 1574.118106
## iter  50 value 1569.291948
## iter  60 value 1564.737992
## iter  70 value 1560.616325
## iter  80 value 1557.585014
## iter  90 value 1552.976149
## iter 100 value 1495.504399
## final  value 1495.504399 
## stopped after 100 iterations
## # weights:  221
## initial  value 9154.924163 
## iter  10 value 1743.333673
## iter  20 value 1742.803236
## iter  30 value 1673.033493
## iter  40 value 1582.521435
## iter  50 value 1497.519470
## iter  60 value 1401.100164
## iter  70 value 1389.837669
## iter  80 value 1371.769042
## iter  90 value 1324.529946
## iter 100 value 1309.357913
## final  value 1309.357913 
## stopped after 100 iterations
## # weights:  331
## initial  value 5030.981513 
## iter  10 value 1782.476408
## iter  20 value 1751.233309
## iter  30 value 1750.838842
## iter  40 value 1742.294436
## iter  50 value 1741.989461
## iter  60 value 1690.051486
## iter  70 value 1627.264010
## iter  80 value 1627.026351
## iter  90 value 1586.278796
## iter 100 value 1572.625017
## final  value 1572.625017 
## stopped after 100 iterations
## # weights:  551
## initial  value 6623.695013 
## iter  10 value 1751.172458
## iter  20 value 1668.168948
## iter  30 value 1666.674552
## iter  40 value 1661.707725
## iter  50 value 1575.060073
## iter  60 value 1374.325080
## iter  70 value 1306.011868
## iter  80 value 1288.024954
## iter  90 value 1266.081983
## iter 100 value 1257.870299
## final  value 1257.870299 
## stopped after 100 iterations
## # weights:  771
## initial  value 11401.433057 
## iter  10 value 1752.279968
## iter  20 value 1747.518099
## iter  30 value 1604.844103
## iter  40 value 1594.431752
## iter  50 value 1594.362582
## iter  60 value 1592.749351
## iter  70 value 1581.194503
## iter  80 value 1574.547686
## iter  90 value 1531.227621
## iter 100 value 1491.816327
## final  value 1491.816327 
## stopped after 100 iterations
## # weights:  221
## initial  value 10036.893580 
## iter  10 value 1744.252026
## iter  20 value 1743.492810
## iter  30 value 1687.271721
## iter  40 value 1582.637565
## iter  50 value 1560.279298
## iter  60 value 1540.952890
## iter  70 value 1514.406410
## iter  80 value 1500.391225
## iter  90 value 1493.698333
## iter 100 value 1492.582628
## final  value 1492.582628 
## stopped after 100 iterations
## # weights:  331
## initial  value 6472.182982 
## iter  10 value 1741.130968
## iter  20 value 1601.053501
## iter  30 value 1594.141312
## iter  40 value 1531.007705
## iter  50 value 1499.946695
## iter  60 value 1496.090398
## iter  70 value 1495.390296
## iter  80 value 1492.377135
## iter  90 value 1491.522048
## iter 100 value 1487.120362
## final  value 1487.120362 
## stopped after 100 iterations
## # weights:  551
## initial  value 5291.608129 
## iter  10 value 1749.815956
## iter  20 value 1747.562215
## iter  30 value 1675.069743
## iter  40 value 1595.672765
## iter  50 value 1586.176473
## iter  60 value 1581.554308
## iter  70 value 1581.314965
## iter  80 value 1580.520932
## iter  90 value 1560.626357
## iter 100 value 1534.260589
## final  value 1534.260589 
## stopped after 100 iterations
## # weights:  771
## initial  value 3985.214550 
## iter  10 value 1890.432999
## iter  20 value 1621.315108
## iter  30 value 1568.597547
## iter  40 value 1541.497793
## iter  50 value 1517.228594
## iter  60 value 1503.090912
## iter  70 value 1496.934151
## iter  80 value 1491.235218
## iter  90 value 1481.630209
## iter 100 value 1478.008237
## final  value 1478.008237 
## stopped after 100 iterations
## # weights:  221
## initial  value 7808.437522 
## iter  10 value 1738.839016
## iter  20 value 1738.676705
## iter  30 value 1738.241798
## final  value 1738.191681 
## converged
## # weights:  331
## initial  value 9394.854498 
## iter  10 value 1799.249914
## iter  20 value 1716.193968
## iter  30 value 1666.302170
## iter  40 value 1587.927955
## iter  50 value 1576.822746
## iter  60 value 1574.807782
## iter  70 value 1564.456941
## iter  80 value 1540.626097
## iter  90 value 1522.936511
## iter 100 value 1501.429072
## final  value 1501.429072 
## stopped after 100 iterations
## # weights:  551
## initial  value 8498.572185 
## iter  10 value 1691.944135
## iter  20 value 1675.351576
## iter  30 value 1669.294073
## iter  40 value 1568.756260
## iter  50 value 1508.466170
## iter  60 value 1482.613727
## iter  70 value 1426.176183
## iter  80 value 1418.399263
## iter  90 value 1412.219577
## iter 100 value 1370.994191
## final  value 1370.994191 
## stopped after 100 iterations
## # weights:  771
## initial  value 10247.017237 
## iter  10 value 1780.291399
## iter  20 value 1733.120530
## iter  30 value 1731.115171
## iter  40 value 1720.118606
## iter  50 value 1592.441005
## iter  60 value 1586.893544
## iter  70 value 1570.678190
## iter  80 value 1561.287248
## iter  90 value 1559.153902
## iter 100 value 1551.525084
## final  value 1551.525084 
## stopped after 100 iterations
## # weights:  221
## initial  value 3677.363633 
## iter  10 value 1742.704296
## iter  20 value 1740.761867
## iter  30 value 1697.214304
## iter  40 value 1632.665133
## iter  50 value 1585.431294
## iter  60 value 1553.667822
## iter  70 value 1545.715582
## iter  80 value 1535.227481
## iter  90 value 1520.997464
## iter 100 value 1508.979654
## final  value 1508.979654 
## stopped after 100 iterations
## # weights:  331
## initial  value 10076.792490 
## iter  10 value 1721.780765
## iter  20 value 1598.778928
## iter  30 value 1591.327918
## iter  40 value 1584.172916
## iter  50 value 1541.060349
## iter  60 value 1515.943465
## iter  70 value 1496.156592
## iter  80 value 1470.354600
## iter  90 value 1406.295853
## iter 100 value 1376.000209
## final  value 1376.000209 
## stopped after 100 iterations
## # weights:  551
## initial  value 4184.364993 
## iter  10 value 1595.337878
## iter  20 value 1584.370743
## iter  30 value 1571.663154
## iter  40 value 1557.143658
## iter  50 value 1532.705738
## iter  60 value 1523.178189
## iter  70 value 1512.438397
## iter  80 value 1509.035301
## iter  90 value 1505.492535
## iter 100 value 1497.190947
## final  value 1497.190947 
## stopped after 100 iterations
## # weights:  771
## initial  value 7810.693106 
## iter  10 value 1732.074709
## iter  20 value 1732.010288
## iter  30 value 1730.453793
## iter  40 value 1599.978031
## iter  50 value 1587.865276
## iter  60 value 1575.100196
## iter  70 value 1560.762449
## iter  80 value 1537.460961
## iter  90 value 1527.636086
## iter 100 value 1519.834041
## final  value 1519.834041 
## stopped after 100 iterations
## # weights:  221
## initial  value 4251.718866 
## iter  10 value 1762.023532
## iter  20 value 1758.511532
## iter  30 value 1744.269881
## iter  40 value 1634.068123
## iter  50 value 1599.777921
## iter  60 value 1590.670793
## iter  70 value 1586.545395
## iter  80 value 1586.470026
## iter  90 value 1585.796523
## final  value 1585.788769 
## converged
## # weights:  331
## initial  value 7540.548839 
## iter  10 value 1741.790699
## iter  20 value 1740.625390
## iter  30 value 1739.531270
## iter  40 value 1592.214287
## iter  50 value 1586.218753
## iter  60 value 1572.864181
## iter  70 value 1551.621617
## iter  80 value 1536.710594
## iter  90 value 1530.589941
## iter 100 value 1525.326081
## final  value 1525.326081 
## stopped after 100 iterations
## # weights:  551
## initial  value 10538.882511 
## iter  10 value 1627.366847
## iter  20 value 1625.245445
## iter  30 value 1625.161074
## iter  40 value 1624.451352
## iter  50 value 1605.462197
## iter  60 value 1592.175446
## iter  70 value 1584.336227
## iter  80 value 1580.576468
## iter  90 value 1557.907941
## iter 100 value 1530.839596
## final  value 1530.839596 
## stopped after 100 iterations
## # weights:  771
## initial  value 5491.561329 
## iter  10 value 1816.634243
## iter  20 value 1740.790690
## iter  30 value 1732.852738
## iter  40 value 1645.533444
## iter  50 value 1605.279358
## iter  60 value 1588.940729
## iter  70 value 1578.320640
## iter  80 value 1572.303473
## iter  90 value 1568.090418
## iter 100 value 1539.120435
## final  value 1539.120435 
## stopped after 100 iterations
## # weights:  221
## initial  value 7750.497002 
## iter  10 value 2471.012209
## iter  20 value 2351.893420
## iter  30 value 2275.001691
## iter  40 value 2233.081733
## iter  50 value 2062.292020
## iter  60 value 1994.110129
## iter  70 value 1970.597993
## iter  80 value 1932.612818
## iter  90 value 1909.993385
## iter 100 value 1884.881062
## final  value 1884.881062 
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n4_NN1Fit0
## Neural Network 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9879, 9879, 9878 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.9611961  0.1818766
##   2     0.5    0.9634903  0.2988412
##   2     0.7    0.9609261  0.1801302
##   3     0.3    0.9630854  0.2870739
##   3     0.5    0.9589687  0.1754547
##   3     0.7    0.9633554  0.2862024
##   5     0.3    0.9606557  0.1812243
##   5     0.5    0.9628154  0.2844526
##   5     0.7    0.9617358  0.2487758
##   7     0.3    0.9629505  0.2716034
##   7     0.5    0.9618030  0.1989344
##   7     0.7    0.9630179  0.2788320
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.40.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9643725 0.3264489    Fold3
## 2 0.9627455 0.2906926    Fold2
## 3 0.9633529 0.2793820    Fold1
ad_tda_pc_5.40.5_n4_nn1_fit_re<-Adult_TDA_PC_5.40.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n4_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -1.71     0.03     0.07     0.53    -0.48    -0.05     0.27    -0.91 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.42    -0.46    -0.26     0.00    -1.28     0.48    -0.28    -0.51 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##    -0.52    -0.52     0.98    -0.32    -0.21     0.25     0.53    -0.46 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.29    -0.24     0.01     0.09     0.03    -0.46     0.49     0.60 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.67    -0.58    -0.75    -0.33     0.02    -0.34    -0.09     0.02 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.57    -1.38    -0.11    -0.78    -0.10    -0.98     0.53     0.35 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.21     0.20     0.18    -1.81     0.15    -0.79    -0.68     0.11 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     1.32     0.05     0.19    -0.46    -0.69    -0.79    -0.35    -1.36 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.05     0.00     0.04     0.07    -0.07     0.24    -0.28    -0.31 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##    -0.53    -0.29    -0.11    -0.42     0.40    -0.31    -0.06     0.29 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.69    -0.45     0.00    -0.15    -0.21     0.21     0.03    -0.32 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.31     0.27    -0.04     0.81     0.10    -0.73    -0.17    -0.33 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.22    -0.18    -0.08     0.66     0.06    -0.19    -0.06    -0.04 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.18    -0.15     0.14    -0.22     0.11 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     1.20     0.01    -0.18     0.22    -0.22     0.08    -0.12     0.48 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.03     0.59     0.31     0.00     0.44     0.55     0.13     0.22 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -0.39     0.35     0.50    -0.04     0.05    -0.15     0.01    -0.27 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.35     0.39     0.10    -0.33     0.06     0.21    -0.63    -0.45 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.39     0.33     0.30     1.04    -0.10    -1.03     0.01     0.26 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.48     0.35     0.23     0.24     0.09     0.52     0.10    -0.70 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.36    -0.46     0.84     1.05     0.11    -0.30     0.25     0.22 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.14     0.65     0.29     0.03     0.01     0.22     0.48     0.72 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.02    -0.09     0.34    -0.43    -0.62     0.11 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.33     0.21     0.05     0.23     0.33    -0.39    -0.11     0.13 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##    -0.59     0.26     0.00     0.12     0.17     0.19    -0.15     0.11 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00    -0.03     0.16    -0.38     0.15     0.32     0.07     0.08 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.10     0.27     0.13     0.54    -0.27     0.11     0.31    -0.26 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.05     0.10     0.26     0.15    -0.22 
##  b->o h1->o h2->o 
##  1.81  4.19 -7.59
vip(Adult_TDA_PC_5.40.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7402  2059
##      >50K      14   293
##                                           
##                Accuracy : 0.7878          
##                  95% CI : (0.7795, 0.7958)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.17e-11        
##                                           
##                   Kappa : 0.1745          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9981          
##             Specificity : 0.1246          
##          Pos Pred Value : 0.7824          
##          Neg Pred Value : 0.9544          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7578          
##    Detection Prevalence : 0.9686          
##       Balanced Accuracy : 0.5613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7402  2059
##      >50K      14   293
##                                           
##                Accuracy : 0.7878          
##                  95% CI : (0.7795, 0.7958)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.17e-11        
##                                           
##                   Kappa : 0.1745          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9981          
##             Specificity : 0.1246          
##          Pos Pred Value : 0.7824          
##          Neg Pred Value : 0.9544          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7578          
##    Detection Prevalence : 0.9686          
##       Balanced Accuracy : 0.5613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.877764e-01   1.744842e-01   7.795315e-01   7.958494e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.169888e-11   0.000000e+00
ad_tda_pc_5.40.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9981122            0.1245748            0.7823697 
##       Neg Pred Value            Precision               Recall 
##            0.9543974            0.7823697            0.9981122 
##                   F1           Prevalence       Detection Rate 
##            0.8771701            0.7592138            0.7577805 
## Detection Prevalence    Balanced Accuracy 
##            0.9685708            0.5613435
ad_tda_pc_5.40.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n4_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n4_3_fold
##     Accuracy
## 1 -0.1144317
## 2 -0.1697737
## 3 -0.1462958
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9905667
## 
## $winRope
## [1] 0.009433333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n4_3_fold
## $left
## [1] 0.9906489
## 
## $rope
## [1] 0.002229626
## 
## $right
## [1] 0.00712152
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold))
#bf_tda_pca_5.40.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold)
## t = -8.9482, df = 2, p-value = 0.01226
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.21250147 -0.07449929
## sample estimates:
##  mean of x 
## -0.1435004
### Test set diff
diff_tda_pca_5.40.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n4_test
##    Accuracy 
## -0.02856265
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n4_test_odds.left<-bst_tda_pca_5.40.5_nn1.n4_test$probLeft/bst_tda_pca_5.40.5_nn1.n4_test$probRight
bst_tda_pca_5.40.5_nn1.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n4_test
## $winLeft
## [1] 0.8434
## 
## $winRope
## [1] 0.1566
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n4_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n4_test)) #bf_tda_pca_5.40.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test))


##Node5

#Neural Network 1
Adult_TDA_PC_5.40.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 7942.926868 
## iter  10 value 355.117327
## iter  20 value 36.043137
## iter  30 value 33.066816
## iter  40 value 32.587345
## iter  50 value 32.579371
## iter  60 value 32.578074
## final  value 32.578065 
## converged
## # weights:  331
## initial  value 3605.249233 
## iter  10 value 343.463170
## iter  20 value 32.111043
## iter  30 value 31.160648
## iter  40 value 31.160194
## iter  50 value 31.160156
## iter  50 value 31.160156
## iter  50 value 31.160156
## final  value 31.160156 
## converged
## # weights:  551
## initial  value 6692.291375 
## iter  10 value 899.451147
## iter  20 value 54.019412
## iter  30 value 31.657210
## iter  40 value 29.777412
## iter  50 value 29.708946
## iter  60 value 29.708782
## final  value 29.708695 
## converged
## # weights:  771
## initial  value 6928.993355 
## iter  10 value 1068.627082
## iter  20 value 38.724245
## iter  30 value 32.636589
## iter  40 value 29.171255
## iter  50 value 29.101341
## iter  60 value 29.016079
## iter  70 value 28.970190
## iter  80 value 28.969050
## iter  90 value 28.969002
## final  value 28.968998 
## converged
## # weights:  221
## initial  value 4956.590971 
## final  value 1412.873004 
## converged
## # weights:  331
## initial  value 7254.598730 
## final  value 2684.504424 
## converged
## # weights:  551
## initial  value 8151.370167 
## final  value 2991.995328 
## converged
## # weights:  771
## initial  value 6946.274004 
## final  value 3909.231087 
## converged
## # weights:  221
## initial  value 6320.824909 
## iter  10 value 127.326555
## iter  20 value 39.697804
## final  value 39.697370 
## converged
## # weights:  331
## initial  value 7979.306999 
## iter  10 value 64.470103
## iter  20 value 39.487941
## iter  30 value 38.023631
## iter  40 value 37.099045
## iter  50 value 36.669338
## iter  60 value 36.663511
## final  value 36.663510 
## converged
## # weights:  551
## initial  value 10433.701644 
## iter  10 value 133.873403
## iter  20 value 34.786422
## iter  30 value 33.724048
## iter  40 value 33.506576
## final  value 33.506522 
## converged
## # weights:  771
## initial  value 1164.654561 
## iter  10 value 142.822485
## iter  20 value 34.922391
## iter  30 value 32.737740
## iter  40 value 32.115504
## iter  50 value 31.895831
## iter  60 value 31.877317
## iter  70 value 31.877105
## iter  80 value 31.874628
## iter  90 value 31.873795
## iter 100 value 31.873663
## final  value 31.873663 
## stopped after 100 iterations
## # weights:  221
## initial  value 4134.356255 
## iter  10 value 178.171839
## iter  20 value 31.048972
## iter  30 value 25.096713
## iter  40 value 24.973362
## final  value 24.973347 
## converged
## # weights:  331
## initial  value 8139.144329 
## iter  10 value 378.670959
## iter  20 value 25.153574
## iter  30 value 24.180283
## iter  40 value 23.564058
## iter  50 value 22.050754
## iter  60 value 21.148013
## iter  70 value 20.591139
## iter  80 value 20.585668
## iter  90 value 20.585100
## iter 100 value 20.584882
## final  value 20.584882 
## stopped after 100 iterations
## # weights:  551
## initial  value 9911.828319 
## iter  10 value 564.675646
## iter  20 value 26.173246
## iter  30 value 25.589933
## iter  40 value 21.131439
## iter  50 value 20.599504
## iter  60 value 20.474818
## iter  70 value 20.295861
## iter  80 value 20.271395
## iter  90 value 20.270515
## iter 100 value 20.270457
## final  value 20.270457 
## stopped after 100 iterations
## # weights:  771
## initial  value 6011.031140 
## iter  10 value 691.052667
## iter  20 value 25.154534
## iter  30 value 21.914138
## iter  40 value 21.766650
## iter  50 value 21.143598
## iter  60 value 21.123870
## iter  70 value 21.106468
## iter  80 value 21.099230
## iter  90 value 21.099196
## iter 100 value 21.099185
## final  value 21.099185 
## stopped after 100 iterations
## # weights:  221
## initial  value 3413.165267 
## iter  10 value 34.535856
## iter  20 value 33.362393
## iter  30 value 29.527218
## iter  40 value 28.835666
## iter  50 value 28.827513
## iter  50 value 28.827513
## iter  50 value 28.827513
## final  value 28.827513 
## converged
## # weights:  331
## initial  value 5399.193410 
## final  value 1335.770475 
## converged
## # weights:  551
## initial  value 14582.095870 
## final  value 7405.284825 
## converged
## # weights:  771
## initial  value 1462.064049 
## iter  10 value 31.523676
## iter  20 value 29.542944
## iter  30 value 29.519660
## iter  40 value 29.504048
## iter  50 value 29.497763
## iter  60 value 28.282892
## iter  70 value 25.817785
## iter  80 value 22.807741
## iter  90 value 22.704018
## iter 100 value 22.691657
## final  value 22.691657 
## stopped after 100 iterations
## # weights:  221
## initial  value 2187.176096 
## iter  10 value 123.346754
## iter  20 value 38.528755
## iter  30 value 38.477969
## iter  40 value 38.427214
## iter  50 value 34.522669
## iter  60 value 32.496792
## iter  70 value 32.472854
## iter  80 value 32.472559
## iter  80 value 32.472559
## iter  90 value 32.472536
## iter  90 value 32.472536
## iter  90 value 32.472536
## final  value 32.472536 
## converged
## # weights:  331
## initial  value 3194.765393 
## iter  10 value 50.154477
## iter  20 value 38.603315
## iter  30 value 29.422619
## iter  40 value 29.295852
## iter  50 value 29.293438
## final  value 29.293408 
## converged
## # weights:  551
## initial  value 4362.942257 
## iter  10 value 141.402010
## iter  20 value 26.642485
## iter  30 value 25.965123
## iter  40 value 25.961315
## iter  50 value 25.960095
## iter  60 value 25.960042
## final  value 25.960034 
## converged
## # weights:  771
## initial  value 17020.503121 
## iter  10 value 953.038848
## iter  20 value 41.246184
## iter  30 value 28.396451
## iter  40 value 24.292182
## iter  50 value 24.250279
## iter  60 value 24.249360
## iter  70 value 24.224923
## iter  80 value 24.223706
## iter  90 value 24.222542
## iter 100 value 24.222149
## final  value 24.222149 
## stopped after 100 iterations
## # weights:  221
## initial  value 4438.070555 
## iter  10 value 365.447216
## iter  20 value 35.362619
## iter  30 value 35.327244
## iter  40 value 32.581024
## iter  50 value 32.578935
## final  value 32.578624 
## converged
## # weights:  331
## initial  value 8782.774609 
## iter  10 value 198.270494
## iter  20 value 38.013155
## iter  30 value 32.702471
## iter  40 value 32.580038
## iter  50 value 32.572081
## iter  60 value 32.335834
## iter  70 value 31.161139
## iter  80 value 31.160740
## iter  90 value 31.160679
## final  value 31.160670 
## converged
## # weights:  551
## initial  value 16682.229323 
## iter  10 value 69.219878
## iter  20 value 33.000668
## iter  30 value 32.583488
## iter  40 value 32.530877
## iter  50 value 29.944192
## iter  60 value 29.718105
## iter  70 value 29.717408
## iter  80 value 29.717191
## iter  90 value 29.709284
## iter 100 value 29.709165
## final  value 29.709165 
## stopped after 100 iterations
## # weights:  771
## initial  value 2608.672289 
## iter  10 value 366.178080
## iter  20 value 34.528148
## iter  30 value 30.634314
## iter  40 value 30.167829
## iter  50 value 29.644330
## iter  60 value 29.291677
## iter  70 value 29.291382
## iter  80 value 29.290559
## iter  90 value 29.287989
## iter 100 value 29.277138
## final  value 29.277138 
## stopped after 100 iterations
## # weights:  221
## initial  value 6153.663041 
## iter  10 value 62.526738
## iter  20 value 36.503623
## iter  30 value 36.222367
## iter  40 value 36.221304
## final  value 36.221293 
## converged
## # weights:  331
## initial  value 11968.898619 
## final  value 4149.595688 
## converged
## # weights:  551
## initial  value 8450.432723 
## iter  10 value 591.285074
## iter  20 value 34.005089
## iter  30 value 32.599058
## iter  40 value 32.578726
## iter  50 value 32.578682
## iter  60 value 32.578626
## iter  60 value 32.578626
## final  value 32.578624 
## converged
## # weights:  771
## initial  value 2486.245048 
## final  value 843.946511 
## converged
## # weights:  221
## initial  value 8721.894553 
## iter  10 value 56.669658
## iter  20 value 39.702777
## iter  30 value 39.698177
## final  value 39.698156 
## converged
## # weights:  331
## initial  value 2660.608222 
## iter  10 value 120.655079
## iter  20 value 37.125952
## iter  30 value 36.713130
## iter  40 value 36.672620
## iter  50 value 36.666231
## iter  60 value 36.664885
## iter  70 value 36.664499
## iter  80 value 36.664274
## iter  90 value 36.664201
## final  value 36.664199 
## converged
## # weights:  551
## initial  value 6582.131503 
## iter  10 value 67.840148
## iter  20 value 56.401897
## iter  30 value 37.710357
## iter  40 value 34.399950
## iter  50 value 33.550233
## iter  60 value 33.525506
## iter  70 value 33.509170
## iter  80 value 33.508797
## iter  90 value 33.507274
## iter 100 value 33.507223
## final  value 33.507223 
## stopped after 100 iterations
## # weights:  771
## initial  value 3745.585180 
## iter  10 value 1454.933416
## iter  20 value 178.180728
## iter  30 value 40.602400
## iter  40 value 36.509414
## iter  50 value 35.151876
## iter  60 value 34.375557
## iter  70 value 33.333207
## iter  80 value 32.831726
## iter  90 value 32.664803
## iter 100 value 32.642996
## final  value 32.642996 
## stopped after 100 iterations
## # weights:  221
## initial  value 9598.117710 
## iter  10 value 336.549767
## iter  20 value 53.758139
## iter  30 value 50.343715
## iter  40 value 49.762404
## iter  50 value 49.759299
## final  value 49.759282 
## converged
Adult_TDA_PC_5.40.5_n5_NN1Fit0
## Neural Network 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8054, 8053, 8055 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa
##   2     0.3    0.9996689  0    
##   2     0.5    0.9996689  0    
##   2     0.7    0.9996689  0    
##   3     0.3    0.9996689  0    
##   3     0.5    0.9996689  0    
##   3     0.7    0.9996689  0    
##   5     0.3    0.9996689  0    
##   5     0.5    0.9996689  0    
##   5     0.7    0.9996689  0    
##   7     0.3    0.9996689  0    
##   7     0.5    0.9996689  0    
##   7     0.7    0.9996689  0    
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.40.5_n5_NN1Fit0$resample
##    Accuracy Kappa Resample
## 1 0.9997516     0    Fold3
## 2 0.9995035     0    Fold2
## 3 0.9997517     0    Fold1
ad_tda_pc_5.40.5_n5_nn1_fit_re<-Adult_TDA_PC_5.40.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n5_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o 
## -2.46 -2.46 -2.46
vip(Adult_TDA_PC_5.40.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.40.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.40.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n5_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n5_3_fold
##     Accuracy
## 1 -0.1498108
## 2 -0.2065316
## 3 -0.1826946
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9916333
## 
## $winRope
## [1] 0.008366667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n5_3_fold
## $left
## [1] 0.9938545
## 
## $rope
## [1] 0.001209545
## 
## $right
## [1] 0.004935967
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold))
#bf_tda_pca_5.40.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold)
## t = -10.927, df = 2, p-value = 0.008271
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2504282 -0.1089298
## sample estimates:
## mean of x 
## -0.179679
### Test set diff
diff_tda_pca_5.40.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n5_test
## Accuracy 
##        0
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nn1.n5_test_odds.left<-bst_tda_pca_5.40.5_nn1.n5_test$probLeft/bst_tda_pca_5.40.5_nn1.n5_test$probRight
bst_tda_pca_5.40.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n5_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n5_test)) #bf_tda_pca_5.40.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_KDE_5.40.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 5401.509703 
## iter  10 value 4209.392862
## iter  20 value 4090.992099
## iter  30 value 4077.036532
## iter  40 value 4073.209742
## iter  50 value 4069.354646
## iter  60 value 3977.147999
## iter  70 value 3881.524185
## iter  80 value 3536.149055
## iter  90 value 3126.210017
## iter 100 value 2848.849146
## final  value 2848.849146 
## stopped after 100 iterations
## # weights:  331
## initial  value 5016.227797 
## iter  10 value 4513.087136
## iter  20 value 4380.199548
## iter  30 value 4158.012016
## iter  40 value 3993.953463
## iter  50 value 3939.766280
## iter  60 value 3917.610239
## iter  70 value 3891.811686
## iter  80 value 3831.486984
## iter  90 value 3757.846009
## iter 100 value 3644.150341
## final  value 3644.150341 
## stopped after 100 iterations
## # weights:  551
## initial  value 5804.548891 
## iter  10 value 4122.569459
## iter  20 value 4094.553122
## iter  30 value 4042.147582
## iter  40 value 4011.991556
## iter  50 value 3977.445645
## iter  60 value 3945.324982
## iter  70 value 3927.557410
## iter  80 value 3913.647728
## iter  90 value 3861.478037
## iter 100 value 3447.754778
## final  value 3447.754778 
## stopped after 100 iterations
## # weights:  771
## initial  value 5769.132866 
## iter  10 value 4490.700237
## iter  20 value 4075.390768
## iter  30 value 3998.280276
## iter  40 value 3923.024835
## iter  50 value 3878.468352
## iter  60 value 3868.009690
## iter  70 value 3852.991813
## iter  80 value 3769.402528
## iter  90 value 3682.267279
## iter 100 value 3232.453044
## final  value 3232.453044 
## stopped after 100 iterations
## # weights:  221
## initial  value 5203.978032 
## iter  10 value 4517.486787
## iter  20 value 4106.953365
## iter  30 value 4091.062191
## iter  40 value 4067.180410
## iter  50 value 4028.915330
## iter  60 value 3977.039652
## iter  70 value 3932.904661
## iter  80 value 3906.078708
## iter  90 value 3874.231834
## iter 100 value 3861.593806
## final  value 3861.593806 
## stopped after 100 iterations
## # weights:  331
## initial  value 8326.870293 
## iter  10 value 4336.703562
## iter  20 value 4321.021575
## iter  30 value 4129.240120
## iter  40 value 3655.189053
## iter  50 value 3252.601200
## iter  60 value 3062.209188
## iter  70 value 2645.697762
## iter  80 value 2550.007625
## iter  90 value 2520.596817
## iter 100 value 2503.504374
## final  value 2503.504374 
## stopped after 100 iterations
## # weights:  551
## initial  value 4677.510831 
## iter  10 value 4518.674018
## iter  20 value 4517.400647
## iter  30 value 4517.170013
## iter  40 value 4119.236731
## iter  50 value 3773.748379
## iter  60 value 3240.754068
## iter  70 value 2926.448153
## iter  80 value 2850.603812
## iter  90 value 2795.933627
## iter 100 value 2778.351183
## final  value 2778.351183 
## stopped after 100 iterations
## # weights:  771
## initial  value 8868.164134 
## iter  10 value 4896.858158
## iter  20 value 4605.557499
## iter  30 value 4439.367406
## iter  40 value 4437.321047
## iter  50 value 4151.038923
## iter  60 value 4084.656045
## iter  70 value 4046.944653
## iter  80 value 3857.740487
## iter  90 value 3545.974378
## iter 100 value 3350.686733
## final  value 3350.686733 
## stopped after 100 iterations
## # weights:  221
## initial  value 7274.192985 
## iter  10 value 4509.643380
## iter  20 value 4116.775702
## iter  30 value 4110.411886
## iter  40 value 4110.394099
## iter  50 value 4107.539700
## iter  60 value 4106.965007
## iter  70 value 4065.329814
## iter  80 value 4050.607859
## iter  90 value 3998.422368
## iter 100 value 3935.405803
## final  value 3935.405803 
## stopped after 100 iterations
## # weights:  331
## initial  value 5303.818911 
## iter  10 value 4135.172962
## iter  20 value 4068.188345
## iter  30 value 3992.490944
## iter  40 value 3906.330354
## iter  50 value 3727.262392
## iter  60 value 3661.437450
## iter  70 value 3557.395824
## iter  80 value 3197.719071
## iter  90 value 2710.053984
## iter 100 value 2579.248158
## final  value 2579.248158 
## stopped after 100 iterations
## # weights:  551
## initial  value 6193.099263 
## iter  10 value 4418.024486
## iter  20 value 4283.877598
## iter  30 value 4116.804516
## iter  40 value 4075.882975
## iter  50 value 4019.582799
## iter  60 value 3968.382834
## iter  70 value 3934.947750
## iter  80 value 3663.519631
## iter  90 value 3238.816235
## iter 100 value 3064.204919
## final  value 3064.204919 
## stopped after 100 iterations
## # weights:  771
## initial  value 8376.891975 
## iter  10 value 4229.431615
## iter  20 value 4069.391169
## iter  30 value 4042.466997
## iter  40 value 3995.127488
## iter  50 value 3977.379569
## iter  60 value 3965.117691
## iter  70 value 3960.132152
## iter  80 value 3959.223714
## iter  90 value 3949.180524
## iter 100 value 3666.190963
## final  value 3666.190963 
## stopped after 100 iterations
## # weights:  221
## initial  value 5244.440945 
## iter  10 value 4518.875879
## final  value 4518.870312 
## converged
## # weights:  331
## initial  value 6141.747555 
## iter  10 value 4167.961980
## iter  20 value 3815.233819
## iter  30 value 3538.249240
## iter  40 value 3245.493534
## iter  50 value 2779.537394
## iter  60 value 2585.838338
## iter  70 value 2525.498236
## iter  80 value 2484.360534
## iter  90 value 2475.593458
## iter 100 value 2441.458336
## final  value 2441.458336 
## stopped after 100 iterations
## # weights:  551
## initial  value 8422.057661 
## iter  10 value 4226.908024
## iter  20 value 4161.560765
## iter  30 value 4159.156196
## iter  40 value 4129.822320
## iter  50 value 4074.268662
## iter  60 value 4009.643216
## iter  70 value 3943.812714
## iter  80 value 3902.448756
## iter  90 value 3887.379055
## iter 100 value 3865.665284
## final  value 3865.665284 
## stopped after 100 iterations
## # weights:  771
## initial  value 4495.530617 
## iter  10 value 4173.565430
## iter  20 value 4084.331433
## iter  30 value 4077.540106
## iter  40 value 4068.000941
## iter  50 value 4064.269095
## iter  60 value 4056.861362
## iter  70 value 4053.977541
## iter  80 value 4002.585433
## iter  90 value 3978.762877
## iter 100 value 3945.756915
## final  value 3945.756915 
## stopped after 100 iterations
## # weights:  221
## initial  value 5816.644453 
## iter  10 value 4503.739629
## iter  20 value 4023.325542
## iter  30 value 4000.745438
## iter  40 value 3994.117802
## iter  50 value 3990.303492
## iter  60 value 3985.861515
## iter  70 value 3984.334916
## iter  80 value 3952.744490
## iter  90 value 3937.635913
## iter 100 value 3927.074353
## final  value 3927.074353 
## stopped after 100 iterations
## # weights:  331
## initial  value 4549.744478 
## iter  10 value 4314.793430
## iter  20 value 4136.258122
## iter  30 value 4111.224015
## iter  40 value 4034.908447
## iter  50 value 4027.966548
## iter  60 value 4025.686465
## iter  70 value 4011.671746
## iter  80 value 3607.720873
## iter  90 value 2979.445343
## iter 100 value 2747.472522
## final  value 2747.472522 
## stopped after 100 iterations
## # weights:  551
## initial  value 9414.461285 
## iter  10 value 4351.732208
## iter  20 value 4132.491308
## iter  30 value 4120.253340
## iter  40 value 4093.429411
## iter  50 value 4014.754993
## iter  60 value 3918.278010
## iter  70 value 3890.538778
## iter  80 value 3835.542400
## iter  90 value 3517.142862
## iter 100 value 3358.098884
## final  value 3358.098884 
## stopped after 100 iterations
## # weights:  771
## initial  value 4572.998836 
## iter  10 value 4169.298999
## iter  20 value 4124.578757
## iter  30 value 4055.451266
## iter  40 value 4051.798168
## iter  50 value 4022.222117
## iter  60 value 4019.853110
## iter  70 value 4016.166104
## iter  80 value 3975.328860
## iter  90 value 3731.953383
## iter 100 value 3312.628378
## final  value 3312.628378 
## stopped after 100 iterations
## # weights:  221
## initial  value 5463.667175 
## iter  10 value 4482.130271
## iter  20 value 4478.009045
## iter  30 value 4171.356601
## iter  40 value 4092.351745
## iter  50 value 4085.786951
## iter  60 value 4032.714830
## iter  70 value 4023.292297
## iter  80 value 4020.864125
## iter  90 value 4019.450823
## iter 100 value 3986.495455
## final  value 3986.495455 
## stopped after 100 iterations
## # weights:  331
## initial  value 5958.066323 
## iter  10 value 4314.463912
## iter  20 value 4300.233222
## iter  30 value 4135.921719
## iter  40 value 4134.939098
## iter  50 value 4114.454385
## iter  60 value 4035.420266
## iter  70 value 4027.899530
## iter  80 value 3992.677234
## iter  90 value 3880.790264
## iter 100 value 3714.367578
## final  value 3714.367578 
## stopped after 100 iterations
## # weights:  551
## initial  value 5742.613447 
## iter  10 value 4180.759619
## iter  20 value 4072.336529
## iter  30 value 4044.312988
## iter  40 value 4032.208779
## iter  50 value 3968.450301
## iter  60 value 3901.196595
## iter  70 value 3886.989076
## iter  80 value 3879.986110
## iter  90 value 3875.250159
## iter 100 value 3827.460139
## final  value 3827.460139 
## stopped after 100 iterations
## # weights:  771
## initial  value 5763.822824 
## iter  10 value 4346.960326
## iter  20 value 4156.607902
## iter  30 value 4154.086398
## iter  40 value 4152.654732
## iter  50 value 4059.838757
## iter  60 value 4013.413449
## iter  70 value 3890.857407
## iter  80 value 3741.924095
## iter  90 value 3728.315390
## iter 100 value 3714.334538
## final  value 3714.334538 
## stopped after 100 iterations
## # weights:  221
## initial  value 6464.936578 
## iter  10 value 4517.282491
## iter  20 value 4454.136226
## iter  30 value 4073.798218
## iter  40 value 4033.441494
## iter  50 value 4022.239457
## iter  60 value 4002.800718
## iter  70 value 3989.651052
## iter  80 value 3973.299930
## iter  90 value 3932.496093
## iter 100 value 3914.157623
## final  value 3914.157623 
## stopped after 100 iterations
## # weights:  331
## initial  value 5680.108040 
## iter  10 value 4515.320944
## iter  20 value 4087.363049
## iter  30 value 4064.745156
## iter  40 value 3996.792058
## iter  50 value 3960.943587
## iter  60 value 3956.733008
## iter  70 value 3942.711818
## iter  80 value 3889.150158
## iter  90 value 3848.882847
## iter 100 value 3821.836769
## final  value 3821.836769 
## stopped after 100 iterations
## # weights:  551
## initial  value 7697.853993 
## iter  10 value 4513.709758
## iter  20 value 4018.939634
## iter  30 value 4012.503320
## iter  40 value 4005.985192
## iter  50 value 4002.948530
## iter  60 value 3987.220372
## iter  70 value 3945.934511
## iter  80 value 3911.791384
## iter  90 value 3900.560115
## iter 100 value 3881.402327
## final  value 3881.402327 
## stopped after 100 iterations
## # weights:  771
## initial  value 7546.066566 
## iter  10 value 4408.243612
## iter  20 value 4131.454477
## iter  30 value 3996.321035
## iter  40 value 3991.243503
## iter  50 value 3979.399818
## iter  60 value 3972.569371
## iter  70 value 3944.479115
## iter  80 value 3903.803959
## iter  90 value 3864.480272
## iter 100 value 3791.126695
## final  value 3791.126695 
## stopped after 100 iterations
## # weights:  221
## initial  value 5186.363507 
## iter  10 value 4421.178274
## iter  20 value 4148.231385
## iter  30 value 4045.672921
## iter  40 value 4026.417823
## iter  50 value 4025.380508
## iter  60 value 3989.500768
## iter  70 value 3942.498639
## iter  80 value 3922.585845
## iter  90 value 3920.145002
## iter 100 value 3885.028438
## final  value 3885.028438 
## stopped after 100 iterations
## # weights:  331
## initial  value 6445.654016 
## iter  10 value 4449.760552
## iter  20 value 4383.141813
## iter  30 value 4253.934287
## iter  40 value 4022.807511
## iter  50 value 3946.424641
## iter  60 value 3698.810012
## iter  70 value 3062.428450
## iter  80 value 2898.385282
## iter  90 value 2643.913331
## iter 100 value 2565.033369
## final  value 2565.033369 
## stopped after 100 iterations
## # weights:  551
## initial  value 4680.174234 
## iter  10 value 4349.262932
## iter  20 value 4344.009698
## iter  30 value 4326.870286
## iter  40 value 4278.156281
## iter  50 value 3909.775433
## iter  60 value 3120.346884
## iter  70 value 2773.056463
## iter  80 value 2754.652805
## iter  90 value 2629.734806
## iter 100 value 2554.226358
## final  value 2554.226358 
## stopped after 100 iterations
## # weights:  771
## initial  value 4786.518209 
## iter  10 value 4161.597472
## iter  20 value 4146.045370
## iter  30 value 4139.933233
## iter  40 value 4128.360031
## iter  50 value 3990.735465
## iter  60 value 3715.458467
## iter  70 value 3152.117382
## iter  80 value 2898.937904
## iter  90 value 2859.129566
## iter 100 value 2686.404981
## final  value 2686.404981 
## stopped after 100 iterations
## # weights:  221
## initial  value 6947.454858 
## iter  10 value 4442.076575
## iter  20 value 4121.357880
## iter  30 value 4121.168373
## iter  40 value 4120.425164
## iter  50 value 4120.294468
## iter  60 value 4120.162678
## iter  70 value 4054.929910
## iter  80 value 3988.157623
## iter  90 value 3973.416222
## iter 100 value 3960.688778
## final  value 3960.688778 
## stopped after 100 iterations
## # weights:  331
## initial  value 5094.584864 
## iter  10 value 4518.788367
## iter  20 value 4517.650071
## iter  30 value 4489.008150
## iter  40 value 4013.618489
## iter  50 value 3962.658918
## iter  60 value 3942.477286
## iter  70 value 3923.867710
## iter  80 value 3904.019966
## iter  90 value 3822.296711
## iter 100 value 3604.228585
## final  value 3604.228585 
## stopped after 100 iterations
## # weights:  551
## initial  value 4599.510601 
## iter  10 value 4356.636166
## iter  20 value 4132.305900
## iter  30 value 4077.929608
## iter  40 value 4055.091179
## iter  50 value 4048.578910
## iter  60 value 4028.326082
## iter  70 value 4019.504154
## iter  80 value 4007.310702
## iter  90 value 3994.476132
## iter 100 value 3827.574757
## final  value 3827.574757 
## stopped after 100 iterations
## # weights:  771
## initial  value 12055.816494 
## iter  10 value 4492.939310
## iter  20 value 4342.136817
## iter  30 value 4039.468631
## iter  40 value 4015.641937
## iter  50 value 4011.436947
## iter  60 value 3993.269769
## iter  70 value 3975.575610
## iter  80 value 3719.186423
## iter  90 value 3418.065818
## iter 100 value 2924.279148
## final  value 2924.279148 
## stopped after 100 iterations
## # weights:  331
## initial  value 6776.448754 
## iter  10 value 6204.101783
## iter  20 value 6193.309695
## iter  30 value 6177.733397
## iter  40 value 6107.348436
## iter  50 value 6013.804645
## iter  60 value 6010.491073
## iter  70 value 5983.843106
## iter  80 value 5946.797970
## iter  90 value 4867.325041
## iter 100 value 4728.678549
## final  value 4728.678549 
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n1_NN1Fit0
## Neural Network 
## 
## 11838 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7891, 7893, 7892 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7869496  0.3071721
##   2     0.5    0.7976855  0.3489376
##   2     0.7    0.7960802  0.3277211
##   3     0.3    0.8263250  0.4828081
##   3     0.5    0.8563101  0.6216816
##   3     0.7    0.8220934  0.4522682
##   5     0.3    0.8057946  0.3964069
##   5     0.5    0.8367090  0.5805923
##   5     0.7    0.7959126  0.3193015
##   7     0.3    0.8007242  0.3742706
##   7     0.5    0.8213359  0.4926218
##   7     0.7    0.8041060  0.3823065
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8573239 0.6233843    Fold3
## 2 0.8555133 0.6195209    Fold2
## 3 0.8560932 0.6221395    Fold1
ad_tda_kde_5.40.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.03     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00    -0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00    -0.06     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.02     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.01     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     3.94    -0.18     0.07     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     2.96    -0.01     0.33    -0.50     0.83     0.00     0.35    -0.22 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.66     0.62     0.88     0.00     0.28     0.46     0.37     0.44 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##    -0.03     0.39     0.37     0.68     0.37     0.18    -1.04     0.25 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -0.21     1.15    -0.72    -0.01    -0.17     0.52    -0.05    -1.06 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     2.15     0.95     0.30     0.16     0.33     0.32     0.12    -0.23 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.67     0.40     1.15     1.02     0.98     0.84    -0.31     0.30 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##    -0.67    -0.85     0.22     0.49     0.53     1.00     1.01     0.58 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##    -0.65     1.16     0.19     0.71     0.05     0.85     1.74     1.22 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.68    -0.03    -0.12    -0.71    -0.86     0.20     0.66 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.59     0.79     0.38     0.49     0.21    -0.23    -1.13     0.81 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.82    -0.44     0.00    -0.07    -0.62    -0.27     0.64     0.72 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##    -0.09    -1.71     1.05    -0.38    -0.13     0.09     1.57     0.56 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.28     0.24    -0.52     0.42    -0.48    -0.60     2.51    -1.35 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.30     0.49    -0.86     0.15    -0.43 
##  b->o h1->o h2->o h3->o 
##  1.62  1.15  0.95 -5.08
vip(Adult_TDA_KDE_5.40.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6801  1046
##      >50K     615  1306
##                                           
##                Accuracy : 0.83            
##                  95% CI : (0.8224, 0.8374)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5039          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9171          
##             Specificity : 0.5553          
##          Pos Pred Value : 0.8667          
##          Neg Pred Value : 0.6799          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6963          
##    Detection Prevalence : 0.8033          
##       Balanced Accuracy : 0.7362          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6801  1046
##      >50K     615  1306
##                                           
##                Accuracy : 0.83            
##                  95% CI : (0.8224, 0.8374)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5039          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9171          
##             Specificity : 0.5553          
##          Pos Pred Value : 0.8667          
##          Neg Pred Value : 0.6799          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6963          
##    Detection Prevalence : 0.8033          
##       Balanced Accuracy : 0.7362          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.299550e-01   5.038680e-01   8.223557e-01   8.373569e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.507872e-65   5.038922e-26
ad_tda_kde_5.40.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9170712            0.5552721            0.8667006 
##       Neg Pred Value            Precision               Recall 
##            0.6798542            0.8667006            0.9170712 
##                   F1           Prevalence       Detection Rate 
##            0.8911747            0.7592138            0.6962531 
## Detection Prevalence    Balanced Accuracy 
##            0.8033374            0.7361717
ad_tda_kde_5.40.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n1_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n1_3_fold
##       Accuracy
## 1 -0.007383106
## 2 -0.062541473
## 3 -0.039036115
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n1_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n1_3_fold
## $winLeft
## [1] 0.8559333
## 
## $winRope
## [1] 0.1440667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n1_3_fold
## $left
## [1] 0.8550607
## 
## $rope
## [1] 0.08056161
## 
## $right
## [1] 0.06437773
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold))
#bf_tda_kde_5.40.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold)
## t = -2.2728, df = 2, p-value = 0.151
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.10507941  0.03243895
## sample estimates:
##   mean of x 
## -0.03632023
### Test set diff
diff_tda_kde_5.40.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n1_test
##   Accuracy 
## -0.0707412
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n1_test_odds.left<-bst_tda_kde_5.40.5_nn1.n1_test$probLeft/bst_tda_kde_5.40.5_nn1.n1_test$probRight
bst_tda_kde_5.40.5_nn1.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n1_test
## $winLeft
## [1] 0.8425667
## 
## $winRope
## [1] 0.1574333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n1_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n1_test)) #bf_tda_pca_5.40.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test))

##Node2

#Neural Network 1
Adult_TDA_KDE_5.40.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4097.830708 
## iter  10 value 3948.916991
## iter  20 value 3911.070706
## iter  30 value 3837.520903
## iter  40 value 3651.529123
## iter  50 value 3493.270197
## iter  60 value 3020.884325
## iter  70 value 2750.306695
## iter  80 value 2551.028000
## iter  90 value 2427.162803
## iter 100 value 2389.082745
## final  value 2389.082745 
## stopped after 100 iterations
## # weights:  331
## initial  value 4135.488880 
## iter  10 value 4077.492751
## iter  20 value 3831.965470
## iter  30 value 3802.253140
## iter  40 value 3777.017727
## iter  50 value 3752.019065
## iter  60 value 3721.047382
## iter  70 value 3653.039398
## iter  80 value 3379.751399
## iter  90 value 3079.506368
## iter 100 value 3020.946373
## final  value 3020.946373 
## stopped after 100 iterations
## # weights:  551
## initial  value 5779.892868 
## iter  10 value 4093.294607
## iter  20 value 3843.931714
## iter  30 value 3840.658862
## iter  40 value 3829.811906
## iter  50 value 3827.741759
## iter  60 value 3825.918507
## iter  70 value 3820.496191
## iter  80 value 3815.937913
## iter  90 value 3813.846782
## iter 100 value 3744.969171
## final  value 3744.969171 
## stopped after 100 iterations
## # weights:  771
## initial  value 8655.760644 
## iter  10 value 3958.591297
## iter  20 value 3874.435852
## iter  30 value 3827.194502
## iter  40 value 3751.197180
## iter  50 value 3740.888503
## iter  60 value 3688.263634
## iter  70 value 3512.522351
## iter  80 value 3252.888673
## iter  90 value 2833.683879
## iter 100 value 2669.724424
## final  value 2669.724424 
## stopped after 100 iterations
## # weights:  221
## initial  value 5008.990779 
## iter  10 value 4127.279180
## iter  20 value 3839.804898
## iter  30 value 3826.307166
## iter  40 value 3790.535146
## iter  50 value 3756.678074
## iter  60 value 3721.744364
## iter  70 value 3703.875977
## iter  80 value 3685.819457
## iter  90 value 3656.545183
## iter 100 value 3626.824690
## final  value 3626.824690 
## stopped after 100 iterations
## # weights:  331
## initial  value 4307.654355 
## iter  10 value 3860.097053
## iter  20 value 3766.828282
## iter  30 value 3743.510094
## iter  40 value 3716.063012
## iter  50 value 3646.683052
## iter  60 value 3318.734314
## iter  70 value 3018.990811
## iter  80 value 2996.082525
## iter  90 value 2973.224457
## iter 100 value 2893.815044
## final  value 2893.815044 
## stopped after 100 iterations
## # weights:  551
## initial  value 6623.848535 
## iter  10 value 4136.963406
## iter  20 value 4124.314199
## iter  30 value 3842.427752
## iter  40 value 3833.440330
## iter  50 value 3821.285457
## iter  60 value 3719.074120
## iter  70 value 3685.634806
## iter  80 value 3670.525694
## iter  90 value 3384.486137
## iter 100 value 3238.673927
## final  value 3238.673927 
## stopped after 100 iterations
## # weights:  771
## initial  value 4866.833137 
## iter  10 value 3883.723609
## iter  20 value 3802.900266
## iter  30 value 3787.906163
## iter  40 value 3747.340044
## iter  50 value 3662.515736
## iter  60 value 3499.218934
## iter  70 value 3289.115831
## iter  80 value 3065.258115
## iter  90 value 2732.421160
## iter 100 value 2644.155857
## final  value 2644.155857 
## stopped after 100 iterations
## # weights:  221
## initial  value 4161.539722 
## iter  10 value 4127.937946
## iter  20 value 4127.387514
## iter  30 value 4127.381168
## iter  30 value 4127.381142
## iter  30 value 4127.381141
## final  value 4127.381141 
## converged
## # weights:  331
## initial  value 5221.673854 
## iter  10 value 4095.334447
## iter  20 value 3920.640384
## iter  30 value 3818.721113
## iter  40 value 3758.945479
## iter  50 value 3744.650909
## iter  60 value 3721.408241
## iter  70 value 3708.425390
## iter  80 value 3686.665094
## iter  90 value 3660.129716
## iter 100 value 3622.332371
## final  value 3622.332371 
## stopped after 100 iterations
## # weights:  551
## initial  value 4798.417284 
## iter  10 value 4127.364356
## iter  20 value 4107.535772
## iter  30 value 3798.852041
## iter  40 value 3755.340710
## iter  50 value 3747.436866
## iter  60 value 3737.124818
## iter  70 value 3724.678954
## iter  80 value 3695.657003
## iter  90 value 3671.668726
## iter 100 value 3661.548834
## final  value 3661.548834 
## stopped after 100 iterations
## # weights:  771
## initial  value 4471.146024 
## iter  10 value 4087.123118
## iter  20 value 4016.740038
## iter  30 value 3871.463900
## iter  40 value 3810.506802
## iter  50 value 3481.725769
## iter  60 value 2897.050691
## iter  70 value 2648.348134
## iter  80 value 2523.414511
## iter  90 value 2475.772362
## iter 100 value 2458.607925
## final  value 2458.607925 
## stopped after 100 iterations
## # weights:  221
## initial  value 4729.978143 
## iter  10 value 4127.173506
## iter  20 value 4127.105448
## iter  30 value 3831.432406
## iter  40 value 3782.757251
## iter  50 value 3758.499207
## iter  60 value 3745.524654
## iter  70 value 3735.965902
## iter  80 value 3704.799841
## iter  90 value 3690.080877
## iter 100 value 3681.467573
## final  value 3681.467573 
## stopped after 100 iterations
## # weights:  331
## initial  value 4146.413469 
## iter  10 value 4057.262271
## iter  20 value 4015.888510
## iter  30 value 3817.918604
## iter  40 value 3810.693499
## iter  50 value 3796.575072
## iter  60 value 3738.250103
## iter  70 value 3527.979528
## iter  80 value 3265.339854
## iter  90 value 3228.620395
## iter 100 value 2864.817106
## final  value 2864.817106 
## stopped after 100 iterations
## # weights:  551
## initial  value 4170.888061 
## iter  10 value 4081.638239
## iter  20 value 3880.354890
## iter  30 value 3859.617503
## iter  40 value 3791.333100
## iter  50 value 3774.208455
## iter  60 value 3771.264026
## iter  70 value 3769.252482
## iter  80 value 3767.834930
## iter  90 value 3712.270035
## iter 100 value 3690.535499
## final  value 3690.535499 
## stopped after 100 iterations
## # weights:  771
## initial  value 6478.485390 
## iter  10 value 3981.340729
## iter  20 value 3816.017624
## iter  30 value 3799.387855
## iter  40 value 3789.365738
## iter  50 value 3770.555324
## iter  60 value 3648.973532
## iter  70 value 3250.831510
## iter  80 value 3129.325279
## iter  90 value 3027.819990
## iter 100 value 2792.980296
## final  value 2792.980296 
## stopped after 100 iterations
## # weights:  221
## initial  value 4147.016568 
## iter  10 value 4127.788633
## iter  20 value 4127.302652
## final  value 4127.297049 
## converged
## # weights:  331
## initial  value 5629.769012 
## iter  10 value 4142.134573
## iter  20 value 3968.430398
## iter  30 value 3893.574272
## iter  40 value 3882.649946
## iter  50 value 3881.362667
## iter  60 value 3879.943896
## iter  70 value 3877.796927
## iter  80 value 3877.714313
## final  value 3877.712890 
## converged
## # weights:  551
## initial  value 4623.268172 
## iter  10 value 4110.162848
## iter  20 value 3934.016543
## iter  30 value 3870.809322
## iter  40 value 3857.632345
## iter  50 value 3833.260124
## iter  60 value 3798.813442
## iter  70 value 3766.936901
## iter  80 value 3745.270527
## iter  90 value 3729.815287
## iter 100 value 3715.007129
## final  value 3715.007129 
## stopped after 100 iterations
## # weights:  771
## initial  value 4234.835165 
## iter  10 value 4092.150126
## iter  20 value 3888.059747
## iter  30 value 3879.113975
## iter  40 value 3877.542481
## iter  50 value 3867.143364
## iter  60 value 3858.880436
## iter  70 value 3812.884812
## iter  80 value 3772.465379
## iter  90 value 3767.074739
## iter 100 value 3764.897226
## final  value 3764.897226 
## stopped after 100 iterations
## # weights:  221
## initial  value 4802.384345 
## iter  10 value 4063.370971
## iter  20 value 3990.010054
## iter  30 value 3989.251747
## iter  40 value 3898.737474
## iter  50 value 3800.583245
## iter  60 value 3785.329074
## iter  70 value 3776.769515
## iter  80 value 3776.596903
## iter  80 value 3776.596866
## iter  80 value 3776.596861
## final  value 3776.596861 
## converged
## # weights:  331
## initial  value 6049.068771 
## iter  10 value 4127.942948
## iter  20 value 4009.893877
## iter  30 value 3823.766722
## iter  40 value 3782.772137
## iter  50 value 3731.227438
## iter  60 value 3052.165745
## iter  70 value 2916.953868
## iter  80 value 2815.470498
## iter  90 value 2620.222775
## iter 100 value 2514.690329
## final  value 2514.690329 
## stopped after 100 iterations
## # weights:  551
## initial  value 7931.965953 
## iter  10 value 4043.732767
## iter  20 value 3816.223704
## iter  30 value 3807.068558
## iter  40 value 3793.634842
## iter  50 value 3786.069180
## iter  60 value 3781.796263
## iter  70 value 3772.409089
## iter  80 value 3759.080109
## iter  90 value 3717.552937
## iter 100 value 3690.224248
## final  value 3690.224248 
## stopped after 100 iterations
## # weights:  771
## initial  value 4554.181499 
## iter  10 value 4099.177507
## iter  20 value 4084.949321
## iter  30 value 3907.654399
## iter  40 value 3894.191615
## iter  50 value 3864.876048
## iter  60 value 3855.984864
## iter  70 value 3764.050679
## iter  80 value 3726.755665
## iter  90 value 3719.175046
## iter 100 value 3674.523924
## final  value 3674.523924 
## stopped after 100 iterations
## # weights:  221
## initial  value 4491.224364 
## iter  10 value 4097.861333
## iter  20 value 3755.654244
## iter  30 value 3750.306834
## iter  40 value 3744.652686
## iter  50 value 3711.212061
## iter  60 value 3575.099234
## iter  70 value 3121.971275
## iter  80 value 2872.104248
## iter  90 value 2787.910622
## iter 100 value 2764.287025
## final  value 2764.287025 
## stopped after 100 iterations
## # weights:  331
## initial  value 7328.927693 
## iter  10 value 4126.441466
## iter  20 value 3902.586962
## iter  30 value 3837.907013
## iter  40 value 3837.850674
## iter  50 value 3823.539237
## iter  60 value 3797.585765
## iter  70 value 3757.475874
## iter  80 value 3500.985892
## iter  90 value 3089.967250
## iter 100 value 2909.383887
## final  value 2909.383887 
## stopped after 100 iterations
## # weights:  551
## initial  value 6156.949871 
## iter  10 value 3966.138126
## iter  20 value 3907.377039
## iter  30 value 3765.908590
## iter  40 value 3707.496077
## iter  50 value 3648.096139
## iter  60 value 3615.076122
## iter  70 value 3581.723169
## iter  80 value 3238.768713
## iter  90 value 3072.668300
## iter 100 value 3033.286533
## final  value 3033.286533 
## stopped after 100 iterations
## # weights:  771
## initial  value 4949.154681 
## iter  10 value 3854.836778
## iter  20 value 3851.335917
## iter  30 value 3764.776338
## iter  40 value 3737.065252
## iter  50 value 3710.404421
## iter  60 value 3625.612965
## iter  70 value 3280.231298
## iter  80 value 2749.454021
## iter  90 value 2722.957190
## iter 100 value 2699.799230
## final  value 2699.799230 
## stopped after 100 iterations
## # weights:  221
## initial  value 4295.359579 
## iter  10 value 3925.069773
## iter  20 value 3825.114273
## iter  30 value 3797.539089
## iter  40 value 3732.515858
## iter  50 value 3609.111721
## iter  60 value 3420.992481
## iter  70 value 3049.563439
## iter  80 value 2725.689952
## iter  90 value 2650.596772
## iter 100 value 2607.810189
## final  value 2607.810189 
## stopped after 100 iterations
## # weights:  331
## initial  value 5950.405546 
## iter  10 value 4126.586661
## iter  20 value 4094.422619
## iter  30 value 3840.773642
## iter  40 value 3832.742687
## iter  50 value 3832.002081
## iter  60 value 3831.420317
## iter  70 value 3830.800340
## iter  80 value 3830.436191
## iter  90 value 3830.422366
## final  value 3830.420321 
## converged
## # weights:  551
## initial  value 4836.904855 
## iter  10 value 4111.481400
## iter  20 value 4070.875533
## iter  30 value 3850.849468
## iter  40 value 3832.858802
## iter  50 value 3826.881805
## iter  60 value 3815.986794
## iter  70 value 3793.570464
## iter  80 value 3745.296817
## iter  90 value 3652.796276
## iter 100 value 3627.201098
## final  value 3627.201098 
## stopped after 100 iterations
## # weights:  771
## initial  value 4379.124380 
## iter  10 value 4099.114087
## iter  20 value 3856.860997
## iter  30 value 3828.296311
## iter  40 value 3788.489318
## iter  50 value 3716.887428
## iter  60 value 3711.094545
## iter  70 value 3691.975024
## iter  80 value 3677.055422
## iter  90 value 3676.112732
## iter 100 value 3665.539608
## final  value 3665.539608 
## stopped after 100 iterations
## # weights:  221
## initial  value 5557.546348 
## iter  10 value 4102.208036
## iter  20 value 4093.977174
## iter  30 value 3835.871339
## iter  40 value 3788.198475
## iter  50 value 3732.013184
## iter  60 value 3681.676960
## iter  70 value 3658.840740
## iter  80 value 3652.611771
## iter  90 value 3627.436038
## iter 100 value 3560.783294
## final  value 3560.783294 
## stopped after 100 iterations
## # weights:  331
## initial  value 4262.197874 
## iter  10 value 4059.300752
## iter  20 value 3850.244005
## iter  30 value 3814.809816
## iter  40 value 3785.051632
## iter  50 value 3761.311384
## iter  60 value 3757.664670
## iter  70 value 3738.491713
## iter  80 value 3720.472360
## iter  90 value 3703.915130
## iter 100 value 3669.679958
## final  value 3669.679958 
## stopped after 100 iterations
## # weights:  551
## initial  value 4438.611376 
## iter  10 value 4101.508492
## iter  20 value 3894.257648
## iter  30 value 3849.477201
## iter  40 value 3838.129742
## iter  50 value 3769.867926
## iter  60 value 3743.777309
## iter  70 value 3726.246964
## iter  80 value 3494.449504
## iter  90 value 3243.380584
## iter 100 value 2676.119908
## final  value 2676.119908 
## stopped after 100 iterations
## # weights:  771
## initial  value 4359.057615 
## iter  10 value 3873.233825
## iter  20 value 3831.957303
## iter  30 value 3830.922685
## iter  40 value 3821.111122
## iter  50 value 3793.533805
## iter  60 value 3557.486726
## iter  70 value 3342.609575
## iter  80 value 2996.732542
## iter  90 value 2905.759064
## iter 100 value 2744.220898
## final  value 2744.220898 
## stopped after 100 iterations
## # weights:  771
## initial  value 7445.709362 
## iter  10 value 6190.527801
## final  value 6190.524988 
## converged
Adult_TDA_KDE_5.40.5_n2_NN1Fit0
## Neural Network 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6901, 6900 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8046560  0.4825058
##   2     0.5    0.7658142  0.2882833
##   2     0.7    0.7475593  0.1818195
##   3     0.3    0.7803086  0.3405386
##   3     0.5    0.7750970  0.3246273
##   3     0.7    0.7856273  0.3713239
##   5     0.3    0.7634046  0.3048563
##   5     0.5    0.7561595  0.2305120
##   5     0.7    0.7833994  0.3738575
##   7     0.3    0.8073625  0.5451341
##   7     0.5    0.7883319  0.3828229
##   7     0.7    0.8042700  0.4681658
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.40.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7971014 0.5384148    Fold2
## 2 0.8272464 0.5692171    Fold1
## 3 0.7977398 0.5277704    Fold3
ad_tda_kde_5.40.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n2_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
## -0.23 -0.23  0.00  0.00  0.00 -0.23 -0.23  0.00
vip(Adult_TDA_KDE_5.40.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.40.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.40.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n2_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n2_3_fold
##      Accuracy
## 1  0.05283932
## 2 -0.03427454
## 3  0.01931733
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n2_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n2_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n2_3_fold
## $winLeft
## [1] 0.1336667
## 
## $winRope
## [1] 0.3726
## 
## $winRight
## [1] 0.4937333
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n2_3_fold
## $left
## [1] 0.2603248
## 
## $rope
## [1] 0.2080283
## 
## $right
## [1] 0.5316468
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold))
#bf_tda_kde_5.40.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold)
## t = 0.49775, df = 2, p-value = 0.668
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.09652703  0.12178177
## sample estimates:
##  mean of x 
## 0.01262737
### Test set diff
diff_tda_kde_5.40.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n2_test
## Accuracy 
##        0
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n2_test_odds.left<-bst_tda_kde_5.40.5_nn1.n2_test$probLeft/bst_tda_kde_5.40.5_nn1.n2_test$probRight
bst_tda_kde_5.40.5_nn1.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n2_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n2_test)) #bf_tda_pca_5.40.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test))

##Node3

#Neural Network 1
Adult_TDA_KDE_5.40.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4284.963875 
## iter  10 value 4127.171279
## final  value 4127.170805 
## converged
## # weights:  331
## initial  value 4524.370765 
## iter  10 value 4098.857804
## iter  20 value 3867.528592
## iter  30 value 3787.424364
## iter  40 value 3708.899162
## iter  50 value 3681.789135
## iter  60 value 3547.279000
## iter  70 value 3005.076305
## iter  80 value 2901.935226
## iter  90 value 2635.381343
## iter 100 value 2478.673740
## final  value 2478.673740 
## stopped after 100 iterations
## # weights:  551
## initial  value 4294.354995 
## iter  10 value 3874.087330
## iter  20 value 3846.713162
## iter  30 value 3834.765428
## iter  40 value 3828.873553
## iter  50 value 3819.859015
## iter  60 value 3792.897364
## iter  70 value 3774.315636
## iter  80 value 3693.523612
## iter  90 value 3365.249146
## iter 100 value 2808.301249
## final  value 2808.301249 
## stopped after 100 iterations
## # weights:  771
## initial  value 4572.009160 
## iter  10 value 4123.842555
## iter  20 value 3869.310587
## iter  30 value 3804.440678
## iter  40 value 3762.472270
## iter  50 value 3758.172584
## iter  60 value 3720.406899
## iter  70 value 3718.412605
## iter  80 value 3693.444249
## iter  90 value 3673.376105
## iter 100 value 3635.147299
## final  value 3635.147299 
## stopped after 100 iterations
## # weights:  221
## initial  value 4113.022918 
## iter  10 value 3848.056877
## iter  20 value 3840.598819
## iter  30 value 3836.066823
## iter  40 value 3795.535036
## iter  50 value 3783.111060
## iter  60 value 3767.737887
## iter  70 value 3745.457228
## iter  80 value 3711.578681
## iter  90 value 3673.846608
## iter 100 value 3516.220359
## final  value 3516.220359 
## stopped after 100 iterations
## # weights:  331
## initial  value 4350.720645 
## iter  10 value 4094.103130
## iter  20 value 3926.643631
## iter  30 value 3925.374523
## iter  40 value 3906.240120
## iter  50 value 3849.655987
## iter  60 value 3832.518092
## iter  70 value 3785.533403
## iter  80 value 3507.244949
## iter  90 value 2975.819917
## iter 100 value 2799.453444
## final  value 2799.453444 
## stopped after 100 iterations
## # weights:  551
## initial  value 6938.510951 
## iter  10 value 4090.348767
## iter  20 value 3925.838714
## iter  30 value 3835.095293
## iter  40 value 3676.987123
## iter  50 value 3400.109678
## iter  60 value 3020.959504
## iter  70 value 2889.059047
## iter  80 value 2673.226083
## iter  90 value 2545.915454
## iter 100 value 2498.194141
## final  value 2498.194141 
## stopped after 100 iterations
## # weights:  771
## initial  value 6162.111056 
## iter  10 value 4026.539784
## iter  20 value 3879.263743
## iter  30 value 3779.802918
## iter  40 value 3761.242528
## iter  50 value 3749.893292
## iter  60 value 3561.180666
## iter  70 value 3255.175547
## iter  80 value 2848.295985
## iter  90 value 2685.853163
## iter 100 value 2663.797795
## final  value 2663.797795 
## stopped after 100 iterations
## # weights:  221
## initial  value 4695.935024 
## iter  10 value 4110.765981
## iter  20 value 3963.522646
## iter  30 value 3952.749159
## iter  40 value 3894.851720
## iter  50 value 3880.373644
## iter  60 value 3861.812484
## iter  70 value 3861.645184
## iter  80 value 3808.394874
## iter  90 value 3765.226962
## iter 100 value 3737.168828
## final  value 3737.168828 
## stopped after 100 iterations
## # weights:  331
## initial  value 4227.999486 
## iter  10 value 4127.482106
## iter  20 value 3914.390968
## iter  30 value 3859.910375
## iter  40 value 3768.350957
## iter  50 value 3755.341888
## iter  60 value 3717.729016
## iter  70 value 3703.943711
## iter  80 value 3690.983658
## iter  90 value 3684.704483
## iter 100 value 3676.404111
## final  value 3676.404111 
## stopped after 100 iterations
## # weights:  551
## initial  value 4323.901844 
## iter  10 value 4132.288525
## iter  20 value 4030.461842
## iter  30 value 3868.158921
## iter  40 value 3829.320389
## iter  50 value 3803.540615
## iter  60 value 3359.265011
## iter  70 value 2732.573018
## iter  80 value 2687.051141
## iter  90 value 2537.984469
## iter 100 value 2449.083739
## final  value 2449.083739 
## stopped after 100 iterations
## # weights:  771
## initial  value 5318.794661 
## iter  10 value 3892.179134
## iter  20 value 3829.431937
## iter  30 value 3781.001717
## iter  40 value 3758.943034
## iter  50 value 3750.745640
## iter  60 value 3745.957505
## iter  70 value 3725.500656
## iter  80 value 3680.274071
## iter  90 value 3670.272373
## iter 100 value 3665.606503
## final  value 3665.606503 
## stopped after 100 iterations
## # weights:  221
## initial  value 5202.800275 
## iter  10 value 4047.425161
## iter  20 value 3854.477378
## iter  30 value 3852.022530
## iter  40 value 3851.402976
## iter  40 value 3851.402941
## iter  50 value 3850.364328
## iter  60 value 3850.347352
## iter  70 value 3837.272104
## iter  80 value 3739.301836
## iter  90 value 3675.466125
## iter 100 value 3626.760281
## final  value 3626.760281 
## stopped after 100 iterations
## # weights:  331
## initial  value 5580.663131 
## iter  10 value 4127.000096
## iter  20 value 3878.122661
## iter  30 value 3839.358817
## iter  40 value 3766.240922
## iter  50 value 3732.734724
## iter  60 value 3688.598306
## iter  70 value 3632.106040
## iter  80 value 3568.298940
## iter  90 value 3562.772470
## iter 100 value 3473.217577
## final  value 3473.217577 
## stopped after 100 iterations
## # weights:  551
## initial  value 4406.001934 
## iter  10 value 4093.804802
## iter  20 value 3890.810320
## iter  30 value 3873.069513
## iter  40 value 3853.617247
## iter  50 value 3852.960106
## iter  50 value 3852.960094
## iter  60 value 3849.514413
## iter  70 value 3763.601897
## iter  80 value 3759.724088
## iter  90 value 3758.210317
## iter 100 value 3741.905631
## final  value 3741.905631 
## stopped after 100 iterations
## # weights:  771
## initial  value 4238.568334 
## iter  10 value 3941.257057
## iter  20 value 3867.850595
## iter  30 value 3861.631353
## iter  40 value 3848.475616
## iter  50 value 3843.855029
## iter  60 value 3840.778150
## iter  70 value 3775.611123
## iter  80 value 3741.293767
## iter  90 value 3720.502355
## iter 100 value 3704.268451
## final  value 3704.268451 
## stopped after 100 iterations
## # weights:  221
## initial  value 4149.442533 
## iter  10 value 4023.068192
## iter  20 value 3794.712880
## iter  30 value 3742.239143
## iter  40 value 3722.376086
## iter  50 value 3718.252005
## iter  60 value 3699.894223
## iter  70 value 3684.029733
## iter  80 value 3654.770737
## iter  90 value 3599.013302
## iter 100 value 3205.634089
## final  value 3205.634089 
## stopped after 100 iterations
## # weights:  331
## initial  value 5853.357015 
## iter  10 value 4126.940624
## iter  20 value 3983.410001
## iter  30 value 3823.835566
## iter  40 value 3803.488634
## iter  50 value 3772.297147
## iter  60 value 3769.949787
## iter  70 value 3767.490100
## iter  80 value 3766.545343
## iter  90 value 3766.130859
## iter 100 value 3765.692043
## final  value 3765.692043 
## stopped after 100 iterations
## # weights:  551
## initial  value 5501.392123 
## iter  10 value 3986.849730
## iter  20 value 3851.949155
## iter  30 value 3809.592306
## iter  40 value 3767.763868
## iter  50 value 3757.711060
## iter  60 value 3744.438509
## iter  70 value 3726.783441
## iter  80 value 3669.591159
## iter  90 value 3523.077368
## iter 100 value 3447.765363
## final  value 3447.765363 
## stopped after 100 iterations
## # weights:  771
## initial  value 4315.642672 
## iter  10 value 4098.630220
## iter  20 value 4095.485797
## iter  30 value 3823.705345
## iter  40 value 3757.079152
## iter  50 value 3749.321834
## iter  60 value 3712.967373
## iter  70 value 3591.146058
## iter  80 value 3486.291769
## iter  90 value 3152.364398
## iter 100 value 2978.220547
## final  value 2978.220547 
## stopped after 100 iterations
## # weights:  221
## initial  value 4920.702395 
## iter  10 value 4089.027364
## iter  20 value 3880.531376
## iter  30 value 3808.984480
## iter  40 value 3711.326485
## iter  50 value 3677.916574
## iter  60 value 3573.468188
## iter  70 value 3326.116119
## iter  80 value 3165.755182
## iter  90 value 3021.158202
## iter 100 value 2822.979030
## final  value 2822.979030 
## stopped after 100 iterations
## # weights:  331
## initial  value 4272.888430 
## iter  10 value 3895.353038
## iter  20 value 3813.637797
## iter  30 value 3778.568556
## iter  40 value 3758.857236
## iter  50 value 3736.489930
## iter  60 value 3722.652363
## iter  70 value 3694.370844
## iter  80 value 3633.894809
## iter  90 value 3453.474565
## iter 100 value 2909.602849
## final  value 2909.602849 
## stopped after 100 iterations
## # weights:  551
## initial  value 6154.246132 
## iter  10 value 4128.446705
## iter  20 value 4126.904870
## iter  30 value 4121.483228
## iter  40 value 4052.098661
## iter  50 value 3864.526061
## iter  60 value 3851.788141
## iter  70 value 3819.674344
## iter  80 value 3779.223130
## iter  90 value 3761.463938
## iter 100 value 3645.183824
## final  value 3645.183824 
## stopped after 100 iterations
## # weights:  771
## initial  value 8663.008833 
## iter  10 value 4099.203777
## iter  20 value 3811.540984
## iter  30 value 3771.878962
## iter  40 value 3541.768057
## iter  50 value 3289.864329
## iter  60 value 2912.533544
## iter  70 value 2601.208106
## iter  80 value 2565.665042
## iter  90 value 2551.590029
## iter 100 value 2495.214685
## final  value 2495.214685 
## stopped after 100 iterations
## # weights:  221
## initial  value 4965.137410 
## iter  10 value 4127.283651
## iter  20 value 3845.386550
## iter  30 value 3800.529844
## iter  40 value 3796.491706
## iter  50 value 3787.779756
## iter  60 value 3769.172549
## iter  70 value 3741.947414
## iter  80 value 3733.434110
## iter  90 value 3718.522029
## iter 100 value 3703.984975
## final  value 3703.984975 
## stopped after 100 iterations
## # weights:  331
## initial  value 5805.953376 
## iter  10 value 4125.312795
## iter  20 value 3994.256242
## iter  30 value 3784.273013
## iter  40 value 3773.739414
## iter  50 value 3723.993945
## iter  60 value 3624.082097
## iter  70 value 2989.123574
## iter  80 value 2720.554871
## iter  90 value 2570.510767
## iter 100 value 2412.110592
## final  value 2412.110592 
## stopped after 100 iterations
## # weights:  551
## initial  value 5077.329769 
## iter  10 value 4127.139574
## final  value 4127.137274 
## converged
## # weights:  771
## initial  value 4222.130083 
## iter  10 value 3868.341544
## iter  20 value 3835.755599
## iter  30 value 3832.681920
## iter  40 value 3832.075712
## iter  50 value 3811.838846
## iter  60 value 3747.902815
## iter  70 value 3708.906488
## iter  80 value 3507.557008
## iter  90 value 3113.771536
## iter 100 value 2998.867254
## final  value 2998.867254 
## stopped after 100 iterations
## # weights:  221
## initial  value 4596.505391 
## iter  10 value 4114.142239
## iter  20 value 4100.978618
## iter  30 value 3825.983353
## iter  40 value 3788.502085
## iter  50 value 3727.715333
## iter  60 value 3698.055999
## iter  70 value 3664.226434
## iter  80 value 3655.982689
## iter  90 value 3606.221814
## iter 100 value 3510.205156
## final  value 3510.205156 
## stopped after 100 iterations
## # weights:  331
## initial  value 4283.856703 
## iter  10 value 4100.910182
## iter  20 value 3873.487416
## iter  30 value 3871.357206
## iter  40 value 3870.893123
## iter  50 value 3813.568735
## iter  60 value 3725.229402
## iter  70 value 3674.093889
## iter  80 value 3664.532453
## iter  90 value 3500.516458
## iter 100 value 3129.353137
## final  value 3129.353137 
## stopped after 100 iterations
## # weights:  551
## initial  value 4181.766311 
## iter  10 value 4128.560904
## iter  20 value 4127.311556
## iter  30 value 4127.297152
## final  value 4127.297057 
## converged
## # weights:  771
## initial  value 4301.684690 
## iter  10 value 4047.754001
## iter  20 value 3858.900901
## iter  30 value 3855.938766
## iter  40 value 3847.513740
## iter  50 value 3843.654602
## iter  60 value 3758.511256
## iter  70 value 3745.988378
## iter  80 value 3730.897838
## iter  90 value 3700.306316
## iter 100 value 3688.076635
## final  value 3688.076635 
## stopped after 100 iterations
## # weights:  221
## initial  value 4247.270749 
## iter  10 value 4050.397137
## iter  20 value 3838.755899
## iter  30 value 3837.620818
## final  value 3837.076827 
## converged
## # weights:  331
## initial  value 5339.979424 
## iter  10 value 4127.048943
## iter  20 value 4126.218238
## iter  30 value 3917.853054
## iter  40 value 3806.296101
## iter  50 value 3784.460481
## iter  60 value 3748.319620
## iter  70 value 3740.299518
## iter  80 value 3737.486576
## iter  90 value 3734.712865
## iter 100 value 3718.296732
## final  value 3718.296732 
## stopped after 100 iterations
## # weights:  551
## initial  value 5519.487886 
## iter  10 value 4097.049493
## iter  20 value 3860.228894
## iter  30 value 3841.875110
## iter  40 value 3824.159833
## iter  50 value 3822.988143
## iter  60 value 3822.874562
## iter  70 value 3805.451554
## iter  80 value 3729.572345
## iter  90 value 3709.366330
## iter 100 value 3690.409458
## final  value 3690.409458 
## stopped after 100 iterations
## # weights:  771
## initial  value 7655.826742 
## iter  10 value 4105.427999
## iter  20 value 3860.911668
## iter  30 value 3841.403289
## iter  40 value 3766.192253
## iter  50 value 3717.641773
## iter  60 value 3635.186838
## iter  70 value 3437.761668
## iter  80 value 2992.388826
## iter  90 value 2842.432785
## iter 100 value 2740.019011
## final  value 2740.019011 
## stopped after 100 iterations
## # weights:  331
## initial  value 6949.098549 
## iter  10 value 5881.219428
## iter  20 value 5801.594500
## iter  30 value 5761.688110
## iter  40 value 5697.122456
## iter  50 value 5558.872204
## iter  60 value 5456.406202
## iter  70 value 5296.305274
## iter  80 value 5056.530139
## iter  90 value 4471.404179
## iter 100 value 3978.123817
## final  value 3978.123817 
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n3_NN1Fit0
## Neural Network 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6900, 6901 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7491046  0.1925832
##   2     0.5    0.7763478  0.3647208
##   2     0.7    0.7770220  0.3497923
##   3     0.3    0.8063984  0.4840893
##   3     0.5    0.7860137  0.4246793
##   3     0.7    0.7706494  0.3054891
##   5     0.3    0.7581874  0.2636821
##   5     0.5    0.7679447  0.2795026
##   5     0.7    0.7821492  0.3636602
##   7     0.3    0.7626316  0.3356978
##   7     0.5    0.7938351  0.4532893
##   7     0.7    0.8005960  0.4742076
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_KDE_5.40.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8133333 0.5609343    Fold3
## 2 0.7765865 0.3114555    Fold2
## 3 0.8292754 0.5798781    Fold1
ad_tda_kde_5.40.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -1.46     0.11    -0.58     0.92    -0.13     0.00     0.22    -0.14 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -1.32    -0.38    -0.05     0.00     0.00     0.00    -0.99     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00    -0.11    -0.42     1.18     0.00    -1.85 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     1.89     0.00     0.00    -1.16    -0.30    -0.67     0.18     1.51 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.25    -1.38    -0.75    -0.10    -0.58     0.14    -0.02     0.27 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.96    -1.36    -0.96    -1.12    -1.55    -0.09     0.96     0.82 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.91     0.73    -0.58    -0.70     0.12    -0.58    -2.02    -1.03 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     2.75    -0.31     0.14    -0.49    -0.43    -0.36    -1.00    -0.46 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.09     0.00     0.03    -0.57     0.14     0.09    -0.22    -0.09 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.11    -0.04    -0.13    -0.03     0.04     0.05    -0.02    -0.14 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.03     0.02     0.00    -0.01    -0.01    -0.07    -0.06    -0.04 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.03    -0.04    -0.03     0.13    -0.03    -0.41     0.00    -0.01 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.07     0.42    -0.08    -0.01    -0.27    -0.05    -0.17    -0.10 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.01     0.00     0.42    -0.12    -0.06 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.08    -0.96    -0.08     0.10    -0.27     0.00     0.63    -0.01 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.51     0.05     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.03     0.04    -0.08     0.00    -0.17 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.25     0.00     0.00    -0.15     0.21    -0.24    -0.01    -0.08 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.03     0.20    -0.01     0.03    -0.08     0.11     0.00    -0.11 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.51    -0.21    -0.06    -0.05    -0.11     0.00    -0.15    -0.04 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.03     0.04     0.02    -0.13     0.10    -0.06     0.06    -0.13 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.08    -0.01    -0.09    -0.22     0.01     0.24    -0.07    -0.01 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.31    -0.32     0.04     0.00    -0.02     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.01     0.02     0.00    -0.02    -0.02    -0.02     0.05     0.01 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.02     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00    -0.01    -0.01    -0.05     0.00     0.01     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00    -0.03     0.03     0.02     0.02    -0.02     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00    -0.01    -0.07     0.02     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.06     0.01    -0.21    -0.02    -0.32    -0.01     0.10    -0.09 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.24     0.29     0.09     0.00     0.00     0.00     0.20     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00    -0.09    -0.17     0.46     0.00    -0.21 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -0.29     0.00     0.00     0.16     0.27     0.17    -0.02     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##    -0.01    -0.34     0.27    -0.02    -0.22    -0.09     0.00     0.53 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.09     0.59     0.14    -0.21     0.08     0.04     0.31    -0.28 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.03    -0.28    -0.51    -0.54     0.48     0.17    -0.10    -0.34 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.40     0.02     0.17    -0.11     0.06    -0.08     0.24    -0.18 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.02     0.02     0.16    -0.03    -0.02     0.03     0.02 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##    -0.03     0.02     0.02     0.05    -0.02    -0.02    -0.07     0.02 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.02     0.01     0.02 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00    -0.07     0.02    -0.02     0.00     0.04    -0.02     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.01    -0.08     0.01    -0.02     0.06     0.00     0.02     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00    -0.27     0.18     0.01 
##  b->o h1->o h2->o h3->o 
##  2.33  3.79  0.94 -6.02
vip(Adult_TDA_KDE_5.40.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6417  1059
##      >50K     999  1293
##                                           
##                Accuracy : 0.7893          
##                  95% CI : (0.7811, 0.7974)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 9.018e-13       
##                                           
##                   Kappa : 0.4187          
##                                           
##  Mcnemar's Test P-Value : 0.1934          
##                                           
##             Sensitivity : 0.8653          
##             Specificity : 0.5497          
##          Pos Pred Value : 0.8583          
##          Neg Pred Value : 0.5641          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6569          
##    Detection Prevalence : 0.7654          
##       Balanced Accuracy : 0.7075          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6417  1059
##      >50K     999  1293
##                                           
##                Accuracy : 0.7893          
##                  95% CI : (0.7811, 0.7974)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 9.018e-13       
##                                           
##                   Kappa : 0.4187          
##                                           
##  Mcnemar's Test P-Value : 0.1934          
##                                           
##             Sensitivity : 0.8653          
##             Specificity : 0.5497          
##          Pos Pred Value : 0.8583          
##          Neg Pred Value : 0.5641          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6569          
##    Detection Prevalence : 0.7654          
##       Balanced Accuracy : 0.7075          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.893120e-01   4.186828e-01   7.810881e-01   7.973631e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.018235e-13   1.934102e-01
ad_tda_kde_5.40.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8652913            0.5497449            0.8583467 
##       Neg Pred Value            Precision               Recall 
##            0.5641361            0.8583467            0.8652913 
##                   F1           Prevalence       Detection Rate 
##            0.8618050            0.7592138            0.6569410 
## Detection Prevalence    Balanced Accuracy 
##            0.7653563            0.7075181
ad_tda_kde_5.40.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n3_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n3_3_fold
##      Accuracy
## 1  0.03660743
## 2  0.01638534
## 3 -0.01221824
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n3_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n3_3_fold
## $winLeft
## [1] 0.08056667
## 
## $winRope
## [1] 0.2424667
## 
## $winRight
## [1] 0.6769667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n3_3_fold
## $left
## [1] 0.1429672
## 
## $rope
## [1] 0.2803128
## 
## $right
## [1] 0.57672
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold))
#bf_tda_kde_5.40.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold)
## t = 0.95959, df = 2, p-value = 0.4385
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.04735046  0.07453348
## sample estimates:
##  mean of x 
## 0.01359151
### Test set diff
diff_tda_kde_5.40.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n3_test
##    Accuracy 
## -0.03009828
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n3_test_odds.left<-bst_tda_kde_5.40.5_nn1.n3_test$probLeft/bst_tda_kde_5.40.5_nn1.n3_test$probRight
bst_tda_kde_5.40.5_nn1.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n3_test
## $winLeft
## [1] 0.8431
## 
## $winRope
## [1] 0.1569
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n3_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n3_test)) #bf_tda_pca_5.40.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test))

##Node4

#Neural Network 1
Adult_TDA_KDE_5.40.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 6532.964301 
## iter  10 value 2805.881588
## iter  20 value 2703.185477
## iter  30 value 2552.622009
## iter  40 value 2290.647844
## iter  50 value 2147.218792
## iter  60 value 2124.117632
## iter  70 value 2084.992182
## iter  80 value 1739.556185
## iter  90 value 1675.863666
## iter 100 value 1643.989968
## final  value 1643.989968 
## stopped after 100 iterations
## # weights:  331
## initial  value 4023.815010 
## iter  10 value 2876.361109
## iter  20 value 2810.086908
## iter  30 value 2809.873229
## iter  40 value 2743.131163
## iter  50 value 2737.872235
## iter  60 value 2703.971046
## iter  70 value 2537.772828
## iter  80 value 2498.965736
## iter  90 value 2457.520583
## iter 100 value 2357.392411
## final  value 2357.392411 
## stopped after 100 iterations
## # weights:  551
## initial  value 3068.833032 
## iter  10 value 2926.491576
## iter  20 value 2817.992087
## iter  30 value 2716.534591
## iter  40 value 2711.834055
## iter  50 value 2706.008801
## iter  60 value 2652.029328
## iter  70 value 2642.422282
## iter  80 value 2638.941754
## iter  90 value 2625.735792
## iter 100 value 2551.406967
## final  value 2551.406967 
## stopped after 100 iterations
## # weights:  771
## initial  value 3991.587074 
## iter  10 value 2923.271181
## iter  20 value 2772.191798
## iter  30 value 2741.592051
## iter  40 value 2733.623021
## iter  50 value 2697.572746
## iter  60 value 2668.422038
## iter  70 value 2564.888047
## iter  80 value 2538.141864
## iter  90 value 2444.348867
## iter 100 value 2247.630005
## final  value 2247.630005 
## stopped after 100 iterations
## # weights:  221
## initial  value 3467.934860 
## iter  10 value 2924.813070
## iter  20 value 2899.272947
## iter  30 value 2898.625741
## iter  30 value 2898.625734
## iter  40 value 2895.778152
## iter  50 value 2834.937279
## iter  60 value 2749.925467
## iter  70 value 2596.938753
## iter  80 value 2282.917064
## iter  90 value 2092.401815
## iter 100 value 1964.821872
## final  value 1964.821872 
## stopped after 100 iterations
## # weights:  331
## initial  value 3160.003171 
## iter  10 value 2926.767964
## iter  20 value 2926.003093
## iter  30 value 2725.103440
## iter  40 value 2684.623734
## iter  50 value 2671.889828
## iter  60 value 2642.782898
## iter  70 value 2611.531914
## iter  80 value 2591.246457
## iter  90 value 2360.987528
## iter 100 value 2175.744076
## final  value 2175.744076 
## stopped after 100 iterations
## # weights:  551
## initial  value 4490.226373 
## iter  10 value 2941.718312
## iter  20 value 2941.684870
## iter  30 value 2941.579352
## iter  40 value 2771.271664
## iter  50 value 2767.536558
## iter  60 value 2757.481162
## iter  70 value 2754.927016
## iter  80 value 2753.674761
## iter  90 value 2742.193336
## final  value 2733.204184 
## converged
## # weights:  771
## initial  value 5626.314344 
## iter  10 value 2797.766563
## iter  20 value 2753.662710
## iter  30 value 2736.469589
## iter  40 value 2728.263802
## iter  50 value 2715.968716
## iter  60 value 2705.452948
## iter  70 value 2701.114964
## iter  80 value 2699.279865
## iter  90 value 2697.259925
## iter 100 value 2687.959242
## final  value 2687.959242 
## stopped after 100 iterations
## # weights:  221
## initial  value 4292.873784 
## iter  10 value 2892.243441
## iter  20 value 2839.321422
## iter  30 value 2834.803905
## iter  40 value 2737.095326
## iter  50 value 2586.729322
## iter  60 value 2412.429338
## iter  70 value 2194.116588
## iter  80 value 2001.810612
## iter  90 value 1890.887861
## iter 100 value 1838.025779
## final  value 1838.025779 
## stopped after 100 iterations
## # weights:  331
## initial  value 3658.265907 
## iter  10 value 2929.367629
## iter  20 value 2753.478480
## iter  30 value 2719.093915
## iter  40 value 2679.800278
## iter  50 value 2675.092508
## iter  60 value 2671.921339
## iter  60 value 2671.921326
## final  value 2671.911947 
## converged
## # weights:  551
## initial  value 3972.781419 
## iter  10 value 2931.271567
## iter  20 value 2734.440426
## iter  30 value 2722.195104
## iter  40 value 2713.997901
## iter  50 value 2709.070407
## iter  60 value 2708.175113
## iter  70 value 2704.282712
## iter  80 value 2662.629499
## iter  90 value 2653.846772
## iter 100 value 2490.106090
## final  value 2490.106090 
## stopped after 100 iterations
## # weights:  771
## initial  value 6277.153227 
## iter  10 value 2947.340059
## iter  20 value 2843.559539
## iter  30 value 2324.406739
## iter  40 value 2043.989335
## iter  50 value 1841.743362
## iter  60 value 1731.962716
## iter  70 value 1718.626033
## iter  80 value 1709.047014
## iter  90 value 1705.273831
## iter 100 value 1704.738628
## final  value 1704.738628 
## stopped after 100 iterations
## # weights:  221
## initial  value 3710.591361 
## iter  10 value 2914.647417
## iter  20 value 2761.703044
## iter  30 value 2742.084912
## iter  40 value 2738.526729
## iter  50 value 2733.210459
## iter  60 value 2708.006080
## iter  70 value 2543.731213
## iter  80 value 2496.321884
## iter  90 value 2429.214709
## iter 100 value 2123.727564
## final  value 2123.727564 
## stopped after 100 iterations
## # weights:  331
## initial  value 3013.248408 
## iter  10 value 2765.792460
## iter  20 value 2720.375882
## iter  30 value 2695.401475
## iter  40 value 2653.588683
## iter  50 value 2603.079192
## iter  60 value 2588.789624
## iter  70 value 2583.762881
## iter  80 value 2552.074766
## iter  90 value 2295.182232
## iter 100 value 1895.173971
## final  value 1895.173971 
## stopped after 100 iterations
## # weights:  551
## initial  value 7794.113473 
## iter  10 value 3197.150737
## iter  20 value 2909.182277
## iter  30 value 2752.469953
## iter  40 value 2746.646982
## iter  50 value 2743.757349
## iter  60 value 2691.068531
## iter  70 value 2680.097901
## iter  80 value 2676.505460
## iter  90 value 2675.090978
## iter 100 value 2669.987823
## final  value 2669.987823 
## stopped after 100 iterations
## # weights:  771
## initial  value 4369.541185 
## iter  10 value 2920.277576
## iter  20 value 2920.225588
## iter  30 value 2765.071040
## iter  40 value 2735.821599
## iter  50 value 2733.225112
## iter  60 value 2717.311049
## iter  70 value 2683.968972
## iter  80 value 2676.933215
## iter  90 value 2675.911699
## iter 100 value 2671.515481
## final  value 2671.515481 
## stopped after 100 iterations
## # weights:  221
## initial  value 5018.836023 
## iter  10 value 2942.626098
## iter  20 value 2940.570165
## iter  30 value 2940.545769
## final  value 2940.545492 
## converged
## # weights:  331
## initial  value 4983.187100 
## iter  10 value 2937.848614
## iter  20 value 2931.472435
## iter  30 value 2923.290864
## iter  40 value 2922.358603
## iter  50 value 2843.928168
## iter  60 value 2751.179649
## iter  70 value 2715.096182
## iter  80 value 2709.532981
## iter  90 value 2677.924683
## iter 100 value 2653.949824
## final  value 2653.949824 
## stopped after 100 iterations
## # weights:  551
## initial  value 5223.923265 
## iter  10 value 2927.398446
## iter  20 value 2795.268037
## iter  30 value 2716.556940
## iter  40 value 2692.077107
## iter  50 value 2654.101050
## iter  60 value 2614.996443
## iter  70 value 2562.546301
## iter  80 value 2392.452623
## iter  90 value 2115.166437
## iter 100 value 1976.956488
## final  value 1976.956488 
## stopped after 100 iterations
## # weights:  771
## initial  value 4029.823179 
## iter  10 value 2750.927833
## iter  20 value 2723.998486
## iter  30 value 2715.893733
## iter  40 value 2701.742322
## iter  50 value 2680.343554
## iter  60 value 2652.183938
## iter  70 value 2291.482005
## iter  80 value 2151.250171
## iter  90 value 2023.392525
## iter 100 value 1860.774320
## final  value 1860.774320 
## stopped after 100 iterations
## # weights:  221
## initial  value 5500.634692 
## final  value 2940.732281 
## converged
## # weights:  331
## initial  value 3474.845338 
## iter  10 value 2940.815204
## iter  20 value 2761.223674
## iter  30 value 2744.154850
## iter  40 value 2741.964914
## iter  50 value 2736.821373
## iter  60 value 2710.399584
## iter  70 value 2705.434988
## iter  80 value 2703.434529
## iter  90 value 2682.373715
## iter 100 value 2626.216382
## final  value 2626.216382 
## stopped after 100 iterations
## # weights:  551
## initial  value 3662.617453 
## iter  10 value 2940.513747
## iter  20 value 2940.361691
## iter  30 value 2933.945455
## iter  40 value 2724.728493
## iter  50 value 2682.908586
## iter  60 value 2680.914904
## iter  70 value 2675.664920
## iter  80 value 2670.749711
## iter  90 value 2663.805713
## iter 100 value 2637.126587
## final  value 2637.126587 
## stopped after 100 iterations
## # weights:  771
## initial  value 3018.652311 
## iter  10 value 2925.482814
## iter  20 value 2878.899941
## iter  30 value 2691.559386
## iter  40 value 2644.407804
## iter  50 value 2350.974948
## iter  60 value 2145.019496
## iter  70 value 2018.620001
## iter  80 value 1920.574699
## iter  90 value 1865.929761
## iter 100 value 1835.621093
## final  value 1835.621093 
## stopped after 100 iterations
## # weights:  221
## initial  value 3806.029802 
## iter  10 value 2941.923399
## iter  20 value 2941.901001
## final  value 2941.860794 
## converged
## # weights:  331
## initial  value 4826.840308 
## iter  10 value 2860.964472
## iter  20 value 2753.692673
## iter  30 value 2750.718594
## final  value 2750.714503 
## converged
## # weights:  551
## initial  value 5640.551877 
## iter  10 value 2774.799889
## iter  20 value 2709.819983
## iter  30 value 2704.645047
## iter  40 value 2691.512304
## iter  50 value 2648.757490
## iter  60 value 2615.966101
## iter  70 value 2560.010005
## iter  80 value 1996.414639
## iter  90 value 1769.556591
## iter 100 value 1687.538304
## final  value 1687.538304 
## stopped after 100 iterations
## # weights:  771
## initial  value 12693.449186 
## iter  10 value 2988.427156
## iter  20 value 2939.090864
## iter  30 value 2752.473583
## iter  40 value 2733.693127
## iter  50 value 2725.702042
## iter  60 value 2684.639643
## iter  70 value 2627.949018
## iter  80 value 2361.287274
## iter  90 value 2076.744703
## iter 100 value 2000.684748
## final  value 2000.684748 
## stopped after 100 iterations
## # weights:  221
## initial  value 3647.509272 
## iter  10 value 2943.629215
## iter  20 value 2942.156532
## iter  30 value 2942.139221
## final  value 2942.139030 
## converged
## # weights:  331
## initial  value 3292.369753 
## iter  10 value 2840.100390
## iter  20 value 2695.801780
## iter  30 value 2590.605767
## iter  40 value 2029.784024
## iter  50 value 1917.086917
## iter  60 value 1863.745849
## iter  70 value 1764.161329
## iter  80 value 1720.177993
## iter  90 value 1719.347575
## iter 100 value 1699.916153
## final  value 1699.916153 
## stopped after 100 iterations
## # weights:  551
## initial  value 5078.886991 
## iter  10 value 2937.337866
## iter  20 value 2753.091517
## iter  30 value 2750.767045
## iter  40 value 2741.229374
## iter  50 value 2692.284691
## iter  60 value 2672.060026
## iter  70 value 2662.480175
## iter  80 value 2660.073665
## iter  90 value 2655.054986
## iter 100 value 2651.882535
## final  value 2651.882535 
## stopped after 100 iterations
## # weights:  771
## initial  value 4485.928875 
## iter  10 value 3038.202290
## iter  20 value 2783.850728
## iter  30 value 2782.228758
## iter  40 value 2780.474225
## iter  50 value 2757.622095
## iter  60 value 2713.762779
## iter  70 value 2627.651319
## iter  80 value 2098.826114
## iter  90 value 1893.981739
## iter 100 value 1771.684758
## final  value 1771.684758 
## stopped after 100 iterations
## # weights:  221
## initial  value 3808.355360 
## iter  10 value 2857.140804
## iter  20 value 2781.978761
## iter  30 value 2752.017874
## iter  40 value 2745.162472
## iter  50 value 2730.616655
## iter  60 value 2682.014932
## iter  70 value 2531.661446
## iter  80 value 2272.805931
## iter  90 value 2004.265642
## iter 100 value 1797.527195
## final  value 1797.527195 
## stopped after 100 iterations
## # weights:  331
## initial  value 6378.533935 
## iter  10 value 2809.075245
## iter  20 value 2806.997469
## iter  30 value 2747.112798
## iter  40 value 2745.087822
## iter  50 value 2738.993852
## iter  60 value 2683.225272
## iter  70 value 2516.343527
## iter  80 value 2436.482072
## iter  90 value 2414.810541
## iter 100 value 2410.513213
## final  value 2410.513213 
## stopped after 100 iterations
## # weights:  551
## initial  value 3733.664642 
## iter  10 value 2928.573538
## iter  20 value 2790.589060
## iter  30 value 2739.450658
## iter  40 value 2737.950984
## iter  50 value 2729.869124
## iter  50 value 2729.869116
## iter  60 value 2698.365900
## iter  70 value 2695.631084
## iter  80 value 2693.779655
## iter  90 value 2689.898157
## iter 100 value 2678.358111
## final  value 2678.358111 
## stopped after 100 iterations
## # weights:  771
## initial  value 3055.694952 
## iter  10 value 2943.953111
## iter  20 value 2767.535307
## iter  30 value 2761.711507
## iter  40 value 2730.402311
## iter  50 value 2719.556533
## iter  60 value 2717.563317
## iter  70 value 2716.163500
## iter  80 value 2708.372059
## iter  90 value 2707.759608
## iter 100 value 2706.416346
## final  value 2706.416346 
## stopped after 100 iterations
## # weights:  331
## initial  value 5445.830102 
## iter  10 value 4299.946166
## iter  20 value 4206.527572
## iter  30 value 4124.525237
## iter  40 value 4121.712924
## iter  50 value 4115.754227
## iter  60 value 4068.216961
## iter  70 value 4052.408805
## iter  80 value 4036.997484
## iter  90 value 3985.715896
## iter 100 value 3781.278838
## final  value 3781.278838 
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n4_NN1Fit0
## Neural Network 
## 
## 8741 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5827, 5827, 5828 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8283913  0.2528047
##   2     0.5    0.8084874  0.1820410
##   2     0.7    0.8375488  0.3638547
##   3     0.3    0.8237040  0.3006314
##   3     0.5    0.8469303  0.4170242
##   3     0.7    0.8210731  0.2186062
##   5     0.3    0.8372074  0.3340522
##   5     0.5    0.8209591  0.2605409
##   5     0.7    0.8202726  0.2349527
##   7     0.3    0.8123797  0.1568151
##   7     0.5    0.8412098  0.4031067
##   7     0.7    0.8366298  0.4191392
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8650875 0.5644840    Fold3
## 2 0.8239533 0.2059136    Fold2
## 3 0.8517502 0.4806750    Fold1
ad_tda_kde_5.40.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n4_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.59    -0.02    -0.14     0.21     0.07    -0.01    -0.08     0.13 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.33     0.11    -0.02     0.00     0.00     0.00     0.70     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00    -0.33    -0.27    -0.79     0.00     0.91 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.37     0.31     0.14     0.03     0.69 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.08    -0.04    -0.18     0.02    -0.16     0.47     0.00     0.04 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.36     0.06     0.18    -0.18     0.25     0.17    -0.01     0.02 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.01    -0.10     0.21    -1.81     0.10     0.17    -0.23    -0.18 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     2.54     0.32     0.01     0.08     0.15     0.03     0.27     0.32 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.01    -0.01     0.37    -0.08    -0.13     0.13     0.10 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.01     0.00     0.02    -0.03     0.32    -0.03     0.00     0.31 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.18    -0.03     0.00    -0.01    -0.01     0.00    -0.01    -0.01 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.28     0.20     0.03     0.02     0.02     0.11     0.02    -0.01 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.02    -0.34    -0.04     0.00     0.14    -0.03    -0.10     0.03 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.02    -0.03    -0.76    -0.02    -0.01 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.52    -0.15    -0.03    -0.52    -0.38    -0.01     0.92    -0.52 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.03     0.06     0.00     0.00     0.00     0.00     0.16     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00    -0.28     0.11     2.06     0.00    -0.18 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00    -2.38     0.42    -0.22    -0.03    -0.52 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.39     0.16    -0.10    -0.19    -0.05     0.28    -0.01    -0.32 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.22     0.03     0.02    -0.59    -0.70    -0.01     0.63    -0.39 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.14     0.37     0.14    -0.87     0.66     0.18    -0.87    -0.02 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.40    -0.18     0.02     0.15    -0.19    -0.32    -0.44    -0.08 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.35     0.00     0.11     0.10     0.02 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.26    -0.04    -0.01     0.08    -0.25     0.02    -0.02     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.01     0.05     0.00     0.02     0.00    -0.01    -0.01    -0.27 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.04    -0.13     0.16     0.05    -0.01     0.21     0.08     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.03    -0.02     0.00     0.05     0.07     0.00    -0.06    -0.01 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##    -0.06    -0.02    -0.71     0.03     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
##  3.67 -9.26  2.08  3.40
vip(Adult_TDA_KDE_5.40.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6091  1553
##      >50K    1325   799
##                                           
##                Accuracy : 0.7054          
##                  95% CI : (0.6962, 0.7144)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1666          
##                                           
##  Mcnemar's Test P-Value : 2.323e-05       
##                                           
##             Sensitivity : 0.8213          
##             Specificity : 0.3397          
##          Pos Pred Value : 0.7968          
##          Neg Pred Value : 0.3762          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6236          
##    Detection Prevalence : 0.7826          
##       Balanced Accuracy : 0.5805          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6091  1553
##      >50K    1325   799
##                                           
##                Accuracy : 0.7054          
##                  95% CI : (0.6962, 0.7144)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1666          
##                                           
##  Mcnemar's Test P-Value : 2.323e-05       
##                                           
##             Sensitivity : 0.8213          
##             Specificity : 0.3397          
##          Pos Pred Value : 0.7968          
##          Neg Pred Value : 0.3762          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6236          
##    Detection Prevalence : 0.7826          
##       Balanced Accuracy : 0.5805          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.053645e-01   1.665557e-01   6.962124e-01   7.143939e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   2.322777e-05
ad_tda_kde_5.40.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8213323            0.3397109            0.7968341 
##       Neg Pred Value            Precision               Recall 
##            0.3761770            0.7968341            0.8213323 
##                   F1           Prevalence       Detection Rate 
##            0.8088977            0.7592138            0.6235667 
## Detection Prevalence    Balanced Accuracy 
##            0.7825553            0.5805216
ad_tda_kde_5.40.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n4_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n4_3_fold
##      Accuracy
## 1 -0.01514677
## 2 -0.03098149
## 3 -0.03469305
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9604
## 
## $winRope
## [1] 0.0396
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n4_3_fold
## $left
## [1] 0.9329398
## 
## $rope
## [1] 0.05038429
## 
## $right
## [1] 0.01667588
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold))
#bf_tda_kde_5.40.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold)
## t = -4.495, df = 2, p-value = 0.0461
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.052727876 -0.001153002
## sample estimates:
##   mean of x 
## -0.02694044
### Test set diff
diff_tda_kde_5.40.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n4_test
##  Accuracy 
## 0.0538493
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n4_test_odds.left<-bst_tda_kde_5.40.5_nn1.n4_test$probLeft/bst_tda_kde_5.40.5_nn1.n4_test$probRight
bst_tda_kde_5.40.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1609
## 
## $winRight
## [1] 0.8391
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n4_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n4_test)) #bf_tda_pca_5.40.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test))

##Node5

#Neural Network 1

Adult_TDA_KDE_5.40.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 3269.045504 
## iter  10 value 1865.868854
## iter  20 value 1748.506493
## iter  30 value 1748.227799
## iter  40 value 1748.223102
## iter  50 value 1745.550855
## iter  60 value 1744.884688
## iter  70 value 1744.877172
## iter  70 value 1744.877155
## iter  70 value 1744.877154
## final  value 1744.877154 
## converged
## # weights:  331
## initial  value 2243.759558 
## iter  10 value 1868.290781
## iter  20 value 1833.736695
## iter  30 value 1739.455336
## iter  40 value 1686.653440
## iter  50 value 1668.844883
## iter  60 value 1490.208414
## iter  70 value 1364.988054
## iter  80 value 1332.308674
## iter  90 value 1240.738551
## iter 100 value 1186.525957
## final  value 1186.525957 
## stopped after 100 iterations
## # weights:  551
## initial  value 3348.268517 
## iter  10 value 1797.378976
## iter  20 value 1726.991062
## iter  30 value 1717.968929
## iter  40 value 1695.253407
## iter  50 value 1634.358971
## iter  60 value 1482.890917
## iter  70 value 1331.279436
## iter  80 value 1249.906239
## iter  90 value 1192.493358
## iter 100 value 1187.776299
## final  value 1187.776299 
## stopped after 100 iterations
## # weights:  771
## initial  value 4130.199564 
## iter  10 value 1812.761157
## iter  20 value 1732.362258
## iter  30 value 1705.502334
## iter  40 value 1699.403379
## iter  50 value 1693.377021
## iter  60 value 1687.058395
## iter  70 value 1643.328630
## iter  80 value 1439.240129
## iter  90 value 1375.981892
## iter 100 value 1300.754436
## final  value 1300.754436 
## stopped after 100 iterations
## # weights:  221
## initial  value 2598.619157 
## iter  10 value 1795.530354
## iter  20 value 1746.005348
## iter  30 value 1741.461177
## iter  40 value 1722.471716
## iter  50 value 1718.710021
## final  value 1718.631853 
## converged
## # weights:  331
## initial  value 2250.964052 
## iter  10 value 1875.205628
## iter  20 value 1873.758303
## iter  30 value 1873.741285
## iter  40 value 1871.850098
## iter  50 value 1865.187778
## iter  60 value 1864.382789
## iter  70 value 1862.854722
## iter  80 value 1862.835817
## final  value 1862.835589 
## converged
## # weights:  551
## initial  value 2743.992338 
## iter  10 value 1874.969524
## iter  20 value 1873.632243
## iter  30 value 1860.157541
## iter  40 value 1771.970618
## iter  50 value 1434.061586
## iter  60 value 1290.072731
## iter  70 value 1229.095583
## iter  80 value 1211.446447
## iter  90 value 1206.484264
## iter 100 value 1197.587344
## final  value 1197.587344 
## stopped after 100 iterations
## # weights:  771
## initial  value 2028.016159 
## iter  10 value 1770.626290
## iter  20 value 1747.154452
## iter  30 value 1740.694723
## iter  40 value 1731.111406
## iter  50 value 1727.480722
## final  value 1727.270400 
## converged
## # weights:  221
## initial  value 3259.637195 
## iter  10 value 1875.741401
## iter  20 value 1875.142629
## iter  30 value 1857.543955
## iter  40 value 1819.913086
## iter  50 value 1737.424532
## iter  60 value 1623.925347
## iter  70 value 1496.856522
## iter  80 value 1295.842677
## iter  90 value 1238.483664
## iter 100 value 1216.790766
## final  value 1216.790766 
## stopped after 100 iterations
## # weights:  331
## initial  value 3739.135424 
## iter  10 value 1871.546885
## iter  20 value 1859.054030
## iter  30 value 1838.916942
## iter  40 value 1802.714705
## iter  50 value 1744.749700
## iter  60 value 1734.047589
## iter  70 value 1721.057050
## iter  80 value 1596.732165
## iter  90 value 1371.311685
## iter 100 value 1306.991847
## final  value 1306.991847 
## stopped after 100 iterations
## # weights:  551
## initial  value 4298.113328 
## iter  10 value 1871.806159
## iter  20 value 1779.657482
## iter  30 value 1758.814099
## iter  40 value 1748.309992
## iter  50 value 1733.212809
## iter  60 value 1731.177436
## iter  70 value 1659.047112
## iter  80 value 1286.084915
## iter  90 value 1245.231413
## iter 100 value 1238.392653
## final  value 1238.392653 
## stopped after 100 iterations
## # weights:  771
## initial  value 5890.622781 
## iter  10 value 1822.212125
## iter  20 value 1759.800615
## iter  30 value 1720.102319
## iter  40 value 1706.930726
## iter  50 value 1701.764034
## iter  60 value 1688.232465
## iter  70 value 1679.466458
## iter  80 value 1675.981088
## iter  90 value 1645.397074
## iter 100 value 1529.252298
## final  value 1529.252298 
## stopped after 100 iterations
## # weights:  221
## initial  value 3055.015087 
## iter  10 value 1838.017661
## iter  20 value 1794.081438
## iter  30 value 1736.542040
## iter  40 value 1455.357941
## iter  50 value 1400.902523
## iter  60 value 1334.404068
## iter  70 value 1287.109335
## iter  80 value 1278.687079
## iter  90 value 1276.717492
## iter 100 value 1276.591905
## final  value 1276.591905 
## stopped after 100 iterations
## # weights:  331
## initial  value 2385.793124 
## iter  10 value 1875.582107
## iter  10 value 1875.582094
## iter  10 value 1875.582086
## final  value 1875.582086 
## converged
## # weights:  551
## initial  value 5850.110221 
## iter  10 value 1874.678440
## iter  20 value 1814.428413
## iter  30 value 1745.708574
## iter  40 value 1745.130999
## final  value 1745.114781 
## converged
## # weights:  771
## initial  value 2123.127137 
## iter  10 value 1875.426686
## iter  20 value 1801.453127
## iter  30 value 1796.830270
## iter  40 value 1737.405281
## iter  50 value 1729.498712
## iter  60 value 1729.364429
## iter  70 value 1729.297323
## iter  80 value 1729.072717
## iter  90 value 1724.829705
## iter 100 value 1713.242477
## final  value 1713.242477 
## stopped after 100 iterations
## # weights:  221
## initial  value 3737.998938 
## iter  10 value 1876.701488
## iter  20 value 1875.644322
## iter  30 value 1870.991973
## iter  40 value 1799.546565
## iter  50 value 1640.271250
## iter  60 value 1370.969229
## iter  70 value 1311.082136
## iter  80 value 1257.960226
## iter  90 value 1227.701159
## iter 100 value 1217.382133
## final  value 1217.382133 
## stopped after 100 iterations
## # weights:  331
## initial  value 4141.783964 
## iter  10 value 1874.782654
## iter  20 value 1859.040235
## iter  30 value 1748.600803
## iter  40 value 1744.459306
## iter  50 value 1730.513907
## iter  60 value 1727.659630
## iter  70 value 1714.917214
## iter  80 value 1673.484060
## iter  90 value 1564.969678
## iter 100 value 1394.404850
## final  value 1394.404850 
## stopped after 100 iterations
## # weights:  551
## initial  value 2923.984518 
## iter  10 value 1855.392496
## iter  20 value 1759.523317
## iter  30 value 1747.633082
## iter  40 value 1735.128590
## iter  50 value 1688.037139
## iter  60 value 1665.664400
## iter  70 value 1640.805135
## iter  80 value 1607.676959
## iter  90 value 1467.203459
## iter 100 value 1379.074165
## final  value 1379.074165 
## stopped after 100 iterations
## # weights:  771
## initial  value 2350.657732 
## iter  10 value 1862.235244
## iter  20 value 1769.687148
## iter  30 value 1710.214712
## iter  40 value 1694.785660
## iter  50 value 1691.241994
## iter  60 value 1688.887357
## iter  70 value 1683.198812
## iter  80 value 1655.411847
## iter  90 value 1625.904567
## iter 100 value 1505.161656
## final  value 1505.161656 
## stopped after 100 iterations
## # weights:  221
## initial  value 2920.528101 
## iter  10 value 1876.177909
## final  value 1876.177709 
## converged
## # weights:  331
## initial  value 4298.408146 
## iter  10 value 1793.313824
## iter  20 value 1753.887937
## iter  30 value 1678.816965
## iter  40 value 1656.832797
## iter  50 value 1488.563439
## iter  60 value 1284.203653
## iter  70 value 1214.925360
## iter  80 value 1205.808706
## iter  90 value 1203.883828
## iter 100 value 1203.505261
## final  value 1203.505261 
## stopped after 100 iterations
## # weights:  551
## initial  value 2615.648353 
## iter  10 value 1876.154301
## iter  20 value 1875.559206
## iter  30 value 1875.552346
## iter  30 value 1875.552331
## iter  40 value 1866.940085
## iter  50 value 1752.021819
## iter  60 value 1731.067338
## iter  70 value 1594.616011
## iter  80 value 1406.827814
## iter  90 value 1304.355190
## iter 100 value 1261.271831
## final  value 1261.271831 
## stopped after 100 iterations
## # weights:  771
## initial  value 2526.707922 
## iter  10 value 1870.282796
## iter  20 value 1809.053463
## iter  30 value 1794.819332
## iter  40 value 1744.726459
## iter  50 value 1744.521012
## iter  60 value 1724.522629
## iter  70 value 1717.271247
## iter  80 value 1592.412646
## iter  90 value 1377.466252
## iter 100 value 1302.095088
## final  value 1302.095088 
## stopped after 100 iterations
## # weights:  221
## initial  value 3411.987446 
## iter  10 value 1875.401684
## iter  20 value 1787.284330
## iter  30 value 1737.280477
## iter  40 value 1732.440813
## iter  50 value 1719.679955
## iter  60 value 1645.971469
## iter  70 value 1419.729166
## iter  80 value 1286.962688
## iter  90 value 1280.364716
## iter 100 value 1274.862541
## final  value 1274.862541 
## stopped after 100 iterations
## # weights:  331
## initial  value 5473.138785 
## iter  10 value 1857.282443
## iter  20 value 1799.385210
## iter  30 value 1756.528963
## iter  40 value 1720.172435
## iter  50 value 1713.730880
## iter  60 value 1712.375751
## iter  70 value 1712.191889
## final  value 1712.189910 
## converged
## # weights:  551
## initial  value 3045.207585 
## iter  10 value 1843.118338
## iter  20 value 1782.831661
## iter  30 value 1720.522667
## iter  40 value 1703.279408
## iter  50 value 1691.043864
## iter  60 value 1653.303541
## iter  70 value 1524.235002
## iter  80 value 1348.065341
## iter  90 value 1306.962319
## iter 100 value 1260.980819
## final  value 1260.980819 
## stopped after 100 iterations
## # weights:  771
## initial  value 3134.990177 
## iter  10 value 1853.889383
## iter  20 value 1753.318649
## iter  30 value 1724.731230
## iter  40 value 1705.508087
## iter  50 value 1700.341314
## iter  60 value 1692.521053
## iter  70 value 1635.141845
## iter  80 value 1599.625093
## iter  90 value 1415.451545
## iter 100 value 1275.917941
## final  value 1275.917941 
## stopped after 100 iterations
## # weights:  221
## initial  value 3007.381605 
## iter  10 value 1865.729007
## iter  20 value 1769.460110
## iter  30 value 1751.740782
## iter  40 value 1749.977614
## iter  50 value 1749.917360
## iter  60 value 1749.574845
## iter  70 value 1739.608465
## iter  80 value 1730.283370
## iter  90 value 1713.112416
## iter 100 value 1633.364955
## final  value 1633.364955 
## stopped after 100 iterations
## # weights:  331
## initial  value 4351.848140 
## iter  10 value 1761.398296
## iter  20 value 1754.560599
## iter  30 value 1750.487661
## iter  40 value 1742.524128
## iter  50 value 1733.791062
## iter  60 value 1721.614087
## iter  70 value 1646.564206
## iter  80 value 1455.926248
## iter  90 value 1396.531999
## iter 100 value 1332.249301
## final  value 1332.249301 
## stopped after 100 iterations
## # weights:  551
## initial  value 4170.975004 
## iter  10 value 1875.507656
## iter  20 value 1851.336027
## iter  30 value 1798.841145
## iter  40 value 1716.410916
## iter  50 value 1655.619548
## iter  60 value 1624.863282
## iter  70 value 1566.559888
## iter  80 value 1475.547331
## iter  90 value 1445.598821
## iter 100 value 1388.984044
## final  value 1388.984044 
## stopped after 100 iterations
## # weights:  771
## initial  value 3851.882032 
## iter  10 value 1871.630239
## iter  20 value 1772.987321
## iter  30 value 1748.907661
## iter  40 value 1742.590802
## iter  50 value 1736.162007
## iter  60 value 1732.282288
## iter  70 value 1725.288089
## iter  80 value 1682.015459
## iter  90 value 1515.666506
## iter 100 value 1338.776015
## final  value 1338.776015 
## stopped after 100 iterations
## # weights:  221
## initial  value 3862.960169 
## iter  10 value 1877.802425
## iter  20 value 1874.089144
## iter  30 value 1785.170159
## iter  40 value 1772.003422
## iter  50 value 1753.787477
## iter  60 value 1733.985934
## iter  70 value 1707.743410
## iter  80 value 1659.426320
## iter  90 value 1472.693222
## iter 100 value 1411.873137
## final  value 1411.873137 
## stopped after 100 iterations
## # weights:  331
## initial  value 4581.276154 
## iter  10 value 1877.368451
## iter  20 value 1875.848392
## iter  30 value 1875.830559
## iter  40 value 1833.636591
## iter  50 value 1748.337728
## iter  60 value 1725.798760
## iter  70 value 1426.819577
## iter  80 value 1354.557104
## iter  90 value 1268.428450
## iter 100 value 1248.316145
## final  value 1248.316145 
## stopped after 100 iterations
## # weights:  551
## initial  value 4679.845220 
## iter  10 value 1870.861503
## iter  20 value 1836.597349
## iter  30 value 1644.342105
## iter  40 value 1460.704466
## iter  50 value 1397.504594
## iter  60 value 1281.445238
## iter  70 value 1256.235906
## iter  80 value 1249.814295
## iter  90 value 1238.793705
## iter 100 value 1235.245190
## final  value 1235.245190 
## stopped after 100 iterations
## # weights:  771
## initial  value 3320.197159 
## iter  10 value 1773.130778
## iter  20 value 1763.488968
## iter  30 value 1754.585907
## iter  40 value 1749.359950
## iter  50 value 1733.114290
## iter  60 value 1725.496987
## iter  70 value 1721.728428
## iter  80 value 1718.923890
## iter  90 value 1713.868960
## iter 100 value 1642.003196
## final  value 1642.003196 
## stopped after 100 iterations
## # weights:  771
## initial  value 11811.625253 
## iter  10 value 2765.188399
## iter  20 value 2708.237117
## iter  30 value 2329.291391
## iter  40 value 2054.155714
## iter  50 value 1901.765800
## iter  60 value 1833.600825
## iter  70 value 1803.001414
## iter  80 value 1777.190843
## iter  90 value 1765.780808
## iter 100 value 1752.217035
## final  value 1752.217035 
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n5_NN1Fit0
## Neural Network 
## 
## 6628 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4418, 4419, 4419 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8645142  0.2460836
##   2     0.5    0.8654194  0.2335553
##   2     0.7    0.8589316  0.1869908
##   3     0.3    0.8589318  0.1813695
##   3     0.5    0.8614983  0.1496912
##   3     0.7    0.8655730  0.3204874
##   5     0.3    0.8657225  0.3164292
##   5     0.5    0.8634583  0.2572368
##   5     0.7    0.8687402  0.3886870
##   7     0.3    0.8685881  0.2382796
##   7     0.5    0.8702480  0.3065192
##   7     0.7    0.8679849  0.2330469
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n5_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8605704 0.2623094    Fold2
## 2 0.8665158 0.2107468    Fold1
## 3 0.8836578 0.4465014    Fold3
ad_tda_kde_5.40.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.08    -0.37     0.09    -0.15     0.73     0.00    -0.57     0.26 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.14    -0.16     0.01     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.13     0.00     0.00    -0.18 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.12     1.08     0.01    -0.19     0.30 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.01     0.28    -0.30    -0.01     0.10    -0.45     0.01     0.29 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.01    -0.45    -0.13     0.27    -0.30     0.07     0.58     0.03 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.20     0.04    -0.17     0.30     0.01    -0.09    -0.36     0.15 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.07     0.10    -0.55     0.16     0.01     0.35    -0.04     0.12 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.01     0.05     0.00     0.01    -0.02     0.02 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.02    -0.12     0.02     0.04     0.00     0.19     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.02     0.00     0.00     0.00     0.01     0.01     0.01     0.01 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##    -0.09     0.03    -0.11     0.01     0.04    -0.07     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00    -0.17     0.03     0.00     0.03     0.00    -0.07     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.23    -0.06     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.24    -0.24     0.03    -0.30    -0.17     0.04     1.40    -0.20 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.32    -0.26     0.01     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00    -0.42     0.00     0.00     1.31 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00    -0.64     0.66     0.53    -0.74    -0.79 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.14     0.19     0.86     0.06     0.08     0.53     0.01    -0.11 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     1.11     0.33     0.02    -0.21    -1.16     0.21    -0.85    -0.11 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.57     0.53    -0.70    -0.01    -0.26     0.34    -0.38     1.53 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.97     0.45    -0.56     0.97     0.16    -0.79     0.63    -0.39 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.01    -0.05    -0.21     0.01     0.11    -0.33     0.11 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.02     0.09    -0.05     0.03     0.12     0.01     0.12     0.01 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.07     0.02     0.00     0.02     0.05     0.01     0.01    -0.08 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.13     0.01    -0.12     0.03     0.10    -0.40     0.02     0.05 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.03     0.09    -0.24     0.04     0.06     0.02     0.04     0.02 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##    -0.02     0.00     0.36    -0.07    -0.03 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##    -0.03    -0.44     0.04    -0.04     0.06     0.00     0.15     0.03 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -0.29     0.03     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.35     0.00     0.00    -0.47 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.09     0.52    -0.30     0.14     0.65 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##    -0.02    -0.39    -0.10    -0.02     0.04    -0.28     0.00     0.33 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.20     0.09    -0.06     0.06     0.08    -0.01    -0.13     0.15 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.13    -0.15    -0.06     0.72    -0.24     0.02    -0.34    -0.22 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.03     0.01    -0.17    -0.22    -0.04     0.40    -0.19     0.16 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.28     0.02    -0.01     0.00    -0.06    -0.02 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00    -0.04     0.04    -0.04    -0.05     0.00     0.04    -0.01 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##    -0.01     0.00     0.00     0.00    -0.01     0.00     0.00     0.04 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##    -0.01     0.01     0.06    -0.01    -0.02    -0.06    -0.02    -0.01 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00    -0.04     0.06     0.01    -0.05     0.00     0.01    -0.03 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.02    -0.02     0.14     0.06     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.06     0.53     0.04     0.05     0.20     0.00    -0.27     0.06 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.01    -0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.14     0.00     0.00    -0.05 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00    -0.03     0.80    -0.09     0.00     0.27 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.05    -0.09    -0.05    -0.02     0.05     0.08     0.00    -0.08 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.02     0.02     0.05     0.10    -0.08     0.00    -0.07     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##    -0.07     0.02     0.01     0.22    -0.15     0.00    -0.01    -0.05 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.04     0.04     0.01     0.01     0.00    -0.01     0.00     0.06 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00    -0.01    -0.41    -0.03     0.01    -0.03    -0.01    -0.01 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.01     0.01     0.00    -0.01     0.00    -0.08     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.01     0.00     0.00     0.00     0.01     0.00     0.01     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.01     0.01    -0.05    -0.01     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00    -0.03     0.00     0.00     0.00     0.02 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.25    -0.01     0.01 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.17    -0.04     0.23    -0.79     0.00     0.11    -0.11    -0.24 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.52     0.32     0.14     0.00     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00    -0.72     0.00     0.00     0.71 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.18     0.31     0.36    -1.14    -0.91 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.46     0.82     0.50     0.07     0.34    -0.09     0.14     0.07 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##    -0.63     0.62     0.52     0.16     0.67     0.39    -0.87    -0.48 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##    -0.26    -0.81     0.41    -0.21    -0.04     0.38     0.79     0.16 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##    -0.91     0.50    -0.39    -0.30     0.90    -0.53     0.11     0.06 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00    -0.02    -0.15     0.33    -0.13    -0.50     0.54 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##    -0.54     0.69    -0.30     0.83     0.11     0.02    -0.55     0.26 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##    -0.53     0.05     0.00     0.07     0.49     0.09     0.28    -0.34 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##    -0.33     0.38    -0.92     0.68     0.35     0.06     0.31     0.08 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.40    -0.93     0.04    -0.21     0.51     0.45    -0.07     0.19 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##    -0.19     0.34    -0.45    -0.28    -0.95 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##    -0.02    -0.06     0.35    -0.90    -0.02     0.00    -0.45     0.25 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     1.08    -0.33     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00    -0.22     0.00     0.00    -0.21 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.41    -0.21    -0.54    -0.30    -0.13 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##    -0.25     0.36     0.78     0.08     0.35    -0.97     0.02     0.45 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##    -0.03     0.31     0.23    -0.07    -0.19    -0.07     0.00    -0.38 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##    -0.01    -0.30     0.64    -0.47    -0.15     0.80     0.39     0.29 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##    -0.87    -0.17     0.00     0.57     0.86    -1.28    -0.62     0.60 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00    -0.02    -0.31     0.00    -0.20    -0.09    -0.01 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.31     0.02     0.93    -0.09    -0.41     0.00     0.00    -0.04 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##    -0.01     0.00     0.00     0.00     0.00     0.00    -0.01    -0.01 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##    -0.08     0.04     0.00     0.01    -0.03     0.16     0.01    -0.01 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.01     0.10    -0.11     0.00     0.02     0.01    -0.02    -0.03 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00    -0.20     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
##  1.10 -1.53 -2.45  0.30 -0.17 -5.28  4.56 -0.21
vip(Adult_TDA_KDE_5.40.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6678  1518
##      >50K     738   834
##                                           
##                Accuracy : 0.769           
##                  95% CI : (0.7606, 0.7774)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.01165         
##                                           
##                   Kappa : 0.2876          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.9005          
##             Specificity : 0.3546          
##          Pos Pred Value : 0.8148          
##          Neg Pred Value : 0.5305          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6837          
##    Detection Prevalence : 0.8391          
##       Balanced Accuracy : 0.6275          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6678  1518
##      >50K     738   834
##                                           
##                Accuracy : 0.769           
##                  95% CI : (0.7606, 0.7774)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.01165         
##                                           
##                   Kappa : 0.2876          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.9005          
##             Specificity : 0.3546          
##          Pos Pred Value : 0.8148          
##          Neg Pred Value : 0.5305          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6837          
##    Detection Prevalence : 0.8391          
##       Balanced Accuracy : 0.6275          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.690418e-01   2.876467e-01   7.605534e-01   7.773695e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.164745e-02   1.883989e-60
ad_tda_kde_5.40.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9004854            0.3545918            0.8147877 
##       Neg Pred Value            Precision               Recall 
##            0.5305344            0.8147877            0.9004854 
##                   F1           Prevalence       Detection Rate 
##            0.8554958            0.7592138            0.6836609 
## Detection Prevalence    Balanced Accuracy 
##            0.8390663            0.6275386
ad_tda_kde_5.40.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n5_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n5_3_fold
##      Accuracy
## 1 -0.01062963
## 2 -0.07354400
## 3 -0.06660064
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9636667
## 
## $winRope
## [1] 0.03633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n5_3_fold
## $left
## [1] 0.8889447
## 
## $rope
## [1] 0.05106214
## 
## $right
## [1] 0.05999317
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold))
#bf_tda_kde_5.40.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold)
## t = -2.5236, df = 2, p-value = 0.1276
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.13594694  0.03543076
## sample estimates:
##   mean of x 
## -0.05025809
### Test set diff
diff_tda_kde_5.40.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n5_test
##    Accuracy 
## -0.00982801
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nn1.n5_test_odds.left<-bst_tda_kde_5.40.5_nn1.n5_test$probLeft/bst_tda_kde_5.40.5_nn1.n5_test$probRight
bst_tda_kde_5.40.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1578667
## 
## $winRight
## [1] 0.8421333
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n5_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n5_test)) #bf_tda_pca_5.40.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test)) 


##Logistic Regression 

adultLrFit <- train(as.factor(adult_df1) ~ ., 
                 data = adult.one_hot_df4Train, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15195, 15196 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8510946  0.5655136
adultLrFit$resample
##    Accuracy     Kappa Resample
## 1 0.8469334 0.5572254    Fold1
## 2 0.8558831 0.5769366    Fold2
## 3 0.8504673 0.5623789    Fold3
ad_lr_fit_re<-adultLrFit$resample[1]

summary(adultLrFit)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.023e+13  6.477e+12   -1.580 0.114062    
## V1                              2.877e-02  1.982e-03   14.517  < 2e-16 ***
## V2..                            1.023e+13  6.477e+12    1.580 0.114062    
## V2.Federal.gov                  1.023e+13  6.477e+12    1.580 0.114062    
## V2.Local.gov                    1.023e+13  6.477e+12    1.580 0.114062    
## V2.Never.worked                -4.493e+15  6.477e+12 -693.776  < 2e-16 ***
## V2.Private                      1.023e+13  6.477e+12    1.580 0.114062    
## V2.Self.emp.inc                 1.023e+13  6.477e+12    1.580 0.114062    
## V2.Self.emp.not.inc             1.023e+13  6.477e+12    1.580 0.114062    
## V2.State.gov                    1.023e+13  6.477e+12    1.580 0.114062    
## V2.Without.pay                  1.023e+13  6.477e+12    1.580 0.114062    
## V3                              6.829e-07  2.062e-07    3.311 0.000929 ***
## V4.10th                        -1.171e+00  1.825e-01   -6.415 1.41e-10 ***
## V4.11th                        -1.000e+00  1.819e-01   -5.498 3.83e-08 ***
## V4.12th                        -7.853e-01  2.752e-01   -2.853 0.004333 ** 
## V4.1st.4th                     -1.871e+00  6.099e-01   -3.067 0.002162 ** 
## V4.5th.6th                     -1.244e+00  3.324e-01   -3.743 0.000182 ***
## V4.7th.8th                     -1.593e+00  2.140e-01   -7.447 9.58e-14 ***
## V4.9th                         -1.634e+00  2.940e-01   -5.558 2.73e-08 ***
## V4.Assoc.acdm                   2.427e-01  1.188e-01    2.043 0.041024 *  
## V4.Assoc.voc                    2.679e-01  1.016e-01    2.637 0.008373 ** 
## V4.Bachelors                    7.912e-01  6.671e-02   11.861  < 2e-16 ***
## V4.Doctorate                    2.068e+00  1.960e-01   10.554  < 2e-16 ***
## V4.HS.grad                     -3.188e-01  6.025e-02   -5.291 1.21e-07 ***
## V4.Masters                      1.203e+00  9.717e-02   12.381  < 2e-16 ***
## V4.Preschool                   -2.478e+01  4.565e+04   -0.001 0.999567    
## V4.Prof.school                  1.768e+00  1.640e-01   10.778  < 2e-16 ***
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.067e-01  1.836e-01   -1.126 0.260179    
## V6.Married.AF.spouse            2.268e+00  6.704e-01    3.383 0.000716 ***
## V6.Married.civ.spouse           2.016e+00  3.637e-01    5.543 2.97e-08 ***
## V6.Married.spouse.absent       -2.659e-01  3.270e-01   -0.813 0.416064    
## V6.Never.married               -5.888e-01  1.902e-01   -3.096 0.001963 ** 
## V6.Separated                   -9.858e-02  2.441e-01   -0.404 0.686323    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.023e-01  1.192e-01    1.697 0.089726 .  
## V7.Armed.Forces                -7.939e-01  1.624e+00   -0.489 0.624962    
## V7.Craft.repair                 2.979e-01  1.019e-01    2.923 0.003466 ** 
## V7.Exec.managerial              1.007e+00  1.044e-01    9.650  < 2e-16 ***
## V7.Farming.fishing             -8.628e-01  1.757e-01   -4.910 9.12e-07 ***
## V7.Handlers.cleaners           -5.935e-01  1.771e-01   -3.351 0.000806 ***
## V7.Machine.op.inspct           -2.407e-02  1.274e-01   -0.189 0.850211    
## V7.Other.service               -6.090e-01  1.490e-01   -4.087 4.38e-05 ***
## V7.Priv.house.serv             -3.405e+00  1.946e+00   -1.750 0.080130 .  
## V7.Prof.specialty               6.747e-01  1.122e-01    6.012 1.83e-09 ***
## V7.Protective.serv              7.220e-01  1.550e-01    4.658 3.20e-06 ***
## V7.Sales                        4.921e-01  1.077e-01    4.570 4.88e-06 ***
## V7.Tech.support                 9.149e-01  1.416e-01    6.461 1.04e-10 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.421e+00  1.220e-01  -11.642  < 2e-16 ***
## V8.Not.in.family               -9.011e-01  3.368e-01   -2.675 0.007467 ** 
## V8.Other.relative              -1.859e+00  3.008e-01   -6.181 6.38e-10 ***
## V8.Own.child                   -2.126e+00  3.330e-01   -6.383 1.74e-10 ***
## V8.Unmarried                   -1.056e+00  3.482e-01   -3.033 0.002418 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -7.414e-01  2.697e-01   -2.749 0.005977 ** 
## V9.Asian.Pac.Islander           4.763e-02  1.879e-01    0.253 0.799923    
## V9.Black                       -1.683e-01  9.175e-02   -1.835 0.066569 .  
## V9.Other                       -3.401e-01  3.230e-01   -1.053 0.292291    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.625e-01  9.424e-02   -9.152  < 2e-16 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.164e-04  1.239e-05   25.527  < 2e-16 ***
## V12                             6.201e-04  4.394e-05   14.113  < 2e-16 ***
## V13                             2.963e-02  1.935e-03   15.312  < 2e-16 ***
## V14..                          -4.819e-01  7.233e-01   -0.666 0.505218    
## V14.Cambodia                    1.147e+00  1.099e+00    1.043 0.296775    
## V14.Canada                     -1.529e-01  7.754e-01   -0.197 0.843649    
## V14.China                      -8.039e-01  8.419e-01   -0.955 0.339658    
## V14.Columbia                   -1.342e+00  1.150e+00   -1.168 0.242916    
## V14.Cuba                        2.932e-01  7.928e-01    0.370 0.711537    
## V14.Dominican.Republic         -1.810e+00  1.268e+00   -1.427 0.153437    
## V14.Ecuador                    -2.579e-01  1.069e+00   -0.241 0.809409    
## V14.El.Salvador                -8.031e-01  9.135e-01   -0.879 0.379330    
## V14.England                     1.845e-01  7.991e-01    0.231 0.817367    
## V14.France                      2.495e-01  1.007e+00    0.248 0.804303    
## V14.Germany                     1.113e-01  7.687e-01    0.145 0.884870    
## V14.Greece                     -1.073e+00  9.239e-01   -1.161 0.245646    
## V14.Guatemala                   2.472e-01  1.066e+00    0.232 0.816614    
## V14.Haiti                      -1.946e+00  1.419e+00   -1.371 0.170353    
## V14.Holand.Netherlands         -2.264e+01  3.459e+05    0.000 0.999948    
## V14.Honduras                   -1.543e+00  2.583e+00   -0.597 0.550296    
## V14.Hong                        3.604e-02  1.018e+00    0.035 0.971757    
## V14.Hungary                    -4.154e-01  1.208e+00   -0.344 0.731004    
## V14.India                      -7.107e-01  8.101e-01   -0.877 0.380320    
## V14.Iran                       -1.027e-01  8.592e-01   -0.120 0.904856    
## V14.Ireland                     4.688e-01  1.051e+00    0.446 0.655675    
## V14.Italy                       3.216e-01  8.264e-01    0.389 0.697120    
## V14.Jamaica                    -1.134e+00  9.678e-01   -1.172 0.241185    
## V14.Japan                       3.956e-01  8.559e-01    0.462 0.643909    
## V14.Laos                       -1.050e+00  1.320e+00   -0.795 0.426381    
## V14.Mexico                     -6.909e-01  7.463e-01   -0.926 0.354542    
## V14.Nicaragua                  -2.445e+01  5.937e+04    0.000 0.999671    
## V14.Outlying.US.Guam.USVI.etc. -2.389e+01  9.718e+04    0.000 0.999804    
## V14.Peru                       -1.208e+00  1.440e+00   -0.839 0.401520    
## V14.Philippines                 1.230e-02  7.716e-01    0.016 0.987281    
## V14.Poland                     -4.460e-01  8.546e-01   -0.522 0.601727    
## V14.Portugal                   -2.091e-01  1.041e+00   -0.201 0.840857    
## V14.Puerto.Rico                -1.305e+00  8.687e-01   -1.502 0.133081    
## V14.Scotland                   -1.326e-01  1.076e+00   -0.123 0.901896    
## V14.South                      -1.915e+00  9.039e-01   -2.118 0.034147 *  
## V14.Taiwan                     -4.376e-01  9.296e-01   -0.471 0.637811    
## V14.Thailand                   -3.764e-01  1.237e+00   -0.304 0.761007    
## V14.Trinadad.Tobago            -4.231e-01  1.156e+00   -0.366 0.714328    
## V14.United.States              -1.082e-01  7.053e-01   -0.153 0.878016    
## V14.Vietnam                    -1.432e+00  9.958e-01   -1.438 0.150307    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25165  on 22792  degrees of freedom
## Residual deviance: 14343  on 22693  degrees of freedom
## AIC: 14543
## 
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6923   940
##      >50K     493  1412
##                                           
##                Accuracy : 0.8533          
##                  95% CI : (0.8461, 0.8603)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5709          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9335          
##             Specificity : 0.6003          
##          Pos Pred Value : 0.8805          
##          Neg Pred Value : 0.7412          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7087          
##    Detection Prevalence : 0.8050          
##       Balanced Accuracy : 0.7669          
##                                           
##        'Positive' Class :  <=50K          
## 
lr_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.532965e-01   5.709073e-01   8.461236e-01   8.602580e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  7.606349e-117   4.844546e-32
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9335221            0.6003401            0.8804528 
##       Neg Pred Value            Precision               Recall 
##            0.7412073            0.8804528            0.9335221 
##                   F1           Prevalence       Detection Rate 
##            0.9062111            0.7592138            0.7087428 
## Detection Prevalence    Balanced Accuracy 
##            0.8049754            0.7669311
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.40.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec, family = 'binomial')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.40.5.n1.vec, 
                    family = 'binomial',
                          method = 'glm', 
                    trControl = fitControl,
                          metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n1_LrFit0
## Generalized Linear Model 
## 
## 3373 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 2248, 2249, 2249 
## Resampling results:
## 
##   Accuracy  Kappa       
##   0.967105  -0.007496659
Adult_TDA_PC_5.40.5_n1_LrFit0$resample
##    Accuracy        Kappa Resample
## 1 0.9217778 -0.014572957    Fold1
## 2 0.9893238 -0.004019652    Fold2
## 3 0.9902135 -0.003897369    Fold3
ad_tda_pc_5.40.5_n1_lr_fit_re<-Adult_TDA_PC_5.40.5_n1_LrFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (31 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                     7.794e+15  1.126e+08   69239921   <2e-16 ***
## V1                             -9.227e+12  1.202e+05  -76762987   <2e-16 ***
## V2..                            1.900e+14  1.601e+07   11869114   <2e-16 ***
## V2.Federal.gov                 -3.718e+14  7.285e+06  -51033622   <2e-16 ***
## V2.Local.gov                   -6.124e+13  6.477e+06   -9456257   <2e-16 ***
## V2.Never.worked                        NA         NA         NA       NA    
## V2.Private                     -3.302e+13  5.275e+06   -6260046   <2e-16 ***
## V2.Self.emp.inc                -1.830e+14  5.981e+06  -30594761   <2e-16 ***
## V2.Self.emp.not.inc            -3.051e+14  6.090e+06  -50107245   <2e-16 ***
## V2.State.gov                           NA         NA         NA       NA    
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              7.779e+05  1.216e+01      63957   <2e-16 ***
## V4.10th                        -3.505e+15  4.050e+07  -86531925   <2e-16 ***
## V4.11th                         3.795e+14  3.399e+07   11165219   <2e-16 ***
## V4.12th                         1.604e+14  4.778e+07    3357530   <2e-16 ***
## V4.1st.4th                      3.935e+15  7.043e+07   55872561   <2e-16 ***
## V4.5th.6th                      2.675e+14  6.741e+07    3968388   <2e-16 ***
## V4.7th.8th                     -2.890e+15  2.205e+07 -131054997   <2e-16 ***
## V4.9th                          2.139e+14  6.740e+07    3173565   <2e-16 ***
## V4.Assoc.acdm                  -2.174e+14  8.499e+06  -25579171   <2e-16 ***
## V4.Assoc.voc                   -1.686e+14  7.782e+06  -21669281   <2e-16 ***
## V4.Bachelors                   -2.940e+14  4.808e+06  -61144356   <2e-16 ***
## V4.Doctorate                   -5.592e+14  6.678e+06  -83742625   <2e-16 ***
## V4.HS.grad                     -1.217e+13  5.804e+06   -2096845   <2e-16 ***
## V4.Masters                     -2.672e+14  5.283e+06  -50580689   <2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                 -4.472e+14  6.126e+06  -73005397   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                    -3.346e+15  7.779e+07  -43005463   <2e-16 ***
## V6.Married.AF.spouse           -7.656e+15  1.287e+08  -59483241   <2e-16 ***
## V6.Married.civ.spouse          -7.937e+15  1.099e+08  -72246183   <2e-16 ***
## V6.Married.spouse.absent               NA         NA         NA       NA    
## V6.Never.married                       NA         NA         NA       NA    
## V6.Separated                           NA         NA         NA       NA    
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                 2.775e+14  1.490e+07   18622394   <2e-16 ***
## V7.Armed.Forces                 4.830e+14  6.801e+07    7102466   <2e-16 ***
## V7.Craft.repair                 1.394e+14  9.570e+06   14563871   <2e-16 ***
## V7.Exec.managerial              1.402e+14  8.628e+06   16251575   <2e-16 ***
## V7.Farming.fishing              1.033e+13  1.143e+07     904262   <2e-16 ***
## V7.Handlers.cleaners            1.423e+15  2.971e+07   47892601   <2e-16 ***
## V7.Machine.op.inspct           -1.784e+14  1.854e+07   -9619362   <2e-16 ***
## V7.Other.service               -2.821e+14  4.083e+07   -6908544   <2e-16 ***
## V7.Priv.house.serv                     NA         NA         NA       NA    
## V7.Prof.specialty               1.782e+14  8.885e+06   20054412   <2e-16 ***
## V7.Protective.serv              1.130e+14  1.145e+07    9864136   <2e-16 ***
## V7.Sales                        2.030e+14  9.085e+06   22338096   <2e-16 ***
## V7.Tech.support                -6.755e+14  1.261e+07  -53579240   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                      3.353e+15  3.040e+07  110266952   <2e-16 ***
## V8.Not.in.family               -3.937e+15  8.131e+07  -48422559   <2e-16 ***
## V8.Other.relative                      NA         NA         NA       NA    
## V8.Own.child                           NA         NA         NA       NA    
## V8.Unmarried                           NA         NA         NA       NA    
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -3.229e+15  3.367e+07  -95892048   <2e-16 ***
## V9.Asian.Pac.Islander          -8.611e+14  1.140e+07  -75562321   <2e-16 ***
## V9.Black                        2.648e+13  1.093e+07    2421904   <2e-16 ***
## V9.Other                       -1.473e+15  2.856e+07  -51592105   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                             NA         NA         NA       NA    
## V10.Male                               NA         NA         NA       NA    
## V11                             1.131e+09  6.370e+01   17761773   <2e-16 ***
## V12                             9.589e+09  1.708e+03    5614454   <2e-16 ***
## V13                            -6.129e+12  1.068e+05  -57403398   <2e-16 ***
## V14..                           1.077e+15  3.480e+07   30942881   <2e-16 ***
## V14.Cambodia                           NA         NA         NA       NA    
## V14.Canada                      1.643e+15  3.705e+07   44334024   <2e-16 ***
## V14.China                       2.799e+15  4.063e+07   68886260   <2e-16 ***
## V14.Columbia                    2.572e+15  7.550e+07   34070629   <2e-16 ***
## V14.Cuba                        2.279e+15  4.070e+07   55992207   <2e-16 ***
## V14.Dominican.Republic                 NA         NA         NA       NA    
## V14.Ecuador                     2.214e+15  5.850e+07   37848677   <2e-16 ***
## V14.El.Salvador                 2.291e+15  4.539e+07   50479266   <2e-16 ***
## V14.England                     1.332e+15  3.815e+07   34907181   <2e-16 ***
## V14.France                     -8.320e+14  4.361e+07  -19078414   <2e-16 ***
## V14.Germany                     2.322e+15  3.748e+07   61964232   <2e-16 ***
## V14.Greece                      2.759e+15  4.388e+07   62885410   <2e-16 ***
## V14.Guatemala                          NA         NA         NA       NA    
## V14.Haiti                              NA         NA         NA       NA    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                           NA         NA         NA       NA    
## V14.Hong                        3.116e+15  5.960e+07   52279623   <2e-16 ***
## V14.Hungary                     2.274e+15  5.867e+07   38752454   <2e-16 ***
## V14.India                       2.318e+15  3.763e+07   61587443   <2e-16 ***
## V14.Iran                        7.275e+14  4.039e+07   18010602   <2e-16 ***
## V14.Ireland                     2.248e+15  5.845e+07   38459941   <2e-16 ***
## V14.Italy                      -1.585e+15  3.971e+07  -39911065   <2e-16 ***
## V14.Jamaica                     2.511e+15  7.623e+07   32936114   <2e-16 ***
## V14.Japan                      -5.501e+14  4.007e+07  -13727127   <2e-16 ***
## V14.Laos                               NA         NA         NA       NA    
## V14.Mexico                      1.156e+15  4.152e+07   27838314   <2e-16 ***
## V14.Nicaragua                          NA         NA         NA       NA    
## V14.Outlying.US.Guam.USVI.etc.         NA         NA         NA       NA    
## V14.Peru                        1.984e+15  7.533e+07   26338332   <2e-16 ***
## V14.Philippines                 2.475e+15  3.974e+07   62284837   <2e-16 ***
## V14.Poland                     -7.120e+14  5.181e+07  -13743542   <2e-16 ***
## V14.Portugal                    2.554e+15  7.551e+07   33828344   <2e-16 ***
## V14.Puerto.Rico                 4.801e+14  4.839e+07    9920182   <2e-16 ***
## V14.Scotland                    2.573e+15  7.537e+07   34140859   <2e-16 ***
## V14.South                       1.221e+15  4.399e+07   27749907   <2e-16 ***
## V14.Taiwan                      2.812e+15  3.967e+07   70887541   <2e-16 ***
## V14.Thailand                    8.287e+14  5.954e+07   13917700   <2e-16 ***
## V14.Trinadad.Tobago                    NA         NA         NA       NA    
## V14.United.States               2.101e+15  3.396e+07   61863098   <2e-16 ***
## V14.Vietnam                     3.606e+15  7.643e+07   47179570   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  304.8  on 3372  degrees of freedom
## Residual deviance: 1874.3  on 3295  degrees of freedom
## AIC: 2030.3
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     98    20
##      >50K    7318  2332
##                                           
##                Accuracy : 0.2488          
##                  95% CI : (0.2402, 0.2575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0023          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.01321         
##             Specificity : 0.99150         
##          Pos Pred Value : 0.83051         
##          Neg Pred Value : 0.24166         
##              Prevalence : 0.75921         
##          Detection Rate : 0.01003         
##    Detection Prevalence : 0.01208         
##       Balanced Accuracy : 0.50236         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     98    20
##      >50K    7318  2332
##                                           
##                Accuracy : 0.2488          
##                  95% CI : (0.2402, 0.2575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0023          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.01321         
##             Specificity : 0.99150         
##          Pos Pred Value : 0.83051         
##          Neg Pred Value : 0.24166         
##              Prevalence : 0.75921         
##          Detection Rate : 0.01003         
##    Detection Prevalence : 0.01208         
##       Balanced Accuracy : 0.50236         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.248771499    0.002287689    0.240223237    0.257469794    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.40.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.01321467           0.99149660           0.83050847 
##       Neg Pred Value            Precision               Recall 
##           0.24165803           0.83050847           0.01321467 
##                   F1           Prevalence       Detection Rate 
##           0.02601540           0.75921376           0.01003276 
## Detection Prevalence    Balanced Accuracy 
##           0.01208026           0.50235563
ad_tda_pc_5.40.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.40.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n1_lr_fit_re)
diff_tda_pca_5.40.5_lr_n1_3_fold
##      Accuracy
## 1 -0.07484437
## 2 -0.13344072
## 3 -0.13974623
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n1_3_fold$probRight
bst_tda_pca_5.40.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n1_3_fold
## $winLeft
## [1] 0.9915
## 
## $winRope
## [1] 0.0085
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n1_3_fold
## $left
## [1] 0.9764466
## 
## $rope
## [1] 0.006536675
## 
## $right
## [1] 0.01701672
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold))
#bf_tda_pca_5.40.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold)
## t = -5.6143, df = 2, p-value = 0.03029
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.20491772 -0.02710317
## sample estimates:
##  mean of x 
## -0.1160104
### Test set diff
diff_tda_pca_5.40.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n1_test
## Accuracy 
## 0.604525
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n1_test_odds.left<-bst_tda_pca_5.40.5_lr.n1_test$probLeft/bst_tda_pca_5.40.5_lr.n1_test$probRight
bst_tda_pca_5.40.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1634
## 
## $winRight
## [1] 0.8366
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n1_test)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n1_test)) #bf_tda_pca_5.40.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.40.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.40.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n2_LrFit0
## Generalized Linear Model 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6850, 6851, 6851 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.6709772  0.2382677
Adult_TDA_PC_5.40.5_n2_LrFit0$resample
##    Accuracy        Kappa Resample
## 1 0.7092820  0.373101325    Fold1
## 2 0.6072993 -0.004327221    Fold2
## 3 0.6963504  0.346028916    Fold3
ad_tda_pc_5.40.5_n2_lr_fit_re<-Adult_TDA_PC_5.40.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (12 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -4.407e+13  2.407e+13  -1.831 0.067089 .  
## V1                             -6.101e-04  2.228e-03  -0.274 0.784239    
## V2..                            3.822e+13  1.810e+13   2.112 0.034710 *  
## V2.Federal.gov                  3.822e+13  1.810e+13   2.112 0.034709 *  
## V2.Local.gov                    3.822e+13  1.810e+13   2.112 0.034710 *  
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                      3.822e+13  1.810e+13   2.112 0.034710 *  
## V2.Self.emp.inc                 3.822e+13  1.810e+13   2.112 0.034710 *  
## V2.Self.emp.not.inc             3.822e+13  1.810e+13   2.112 0.034710 *  
## V2.State.gov                    3.822e+13  1.810e+13   2.112 0.034709 *  
## V2.Without.pay                  3.822e+13  1.810e+13   2.112 0.034709 *  
## V3                              1.216e-06  2.386e-07   5.097 3.45e-07 ***
## V4.10th                        -1.493e-01  2.146e-01  -0.696 0.486528    
## V4.11th                        -5.168e-02  2.543e-01  -0.203 0.838952    
## V4.12th                         7.819e-02  3.367e-01   0.232 0.816335    
## V4.1st.4th                      6.884e-01  8.863e-01   0.777 0.437346    
## V4.5th.6th                      3.228e-01  4.838e-01   0.667 0.504607    
## V4.7th.8th                     -6.676e-01  2.264e-01  -2.949 0.003191 ** 
## V4.9th                         -3.572e-01  3.713e-01  -0.962 0.335925    
## V4.Assoc.acdm                  -2.324e-01  1.296e-01  -1.793 0.072994 .  
## V4.Assoc.voc                   -2.598e-01  1.092e-01  -2.379 0.017347 *  
## V4.Bachelors                    2.593e-01  7.265e-02   3.568 0.000359 ***
## V4.Doctorate                    9.387e-01  1.968e-01   4.769 1.85e-06 ***
## V4.HS.grad                     -2.324e-01  6.665e-02  -3.487 0.000489 ***
## V4.Masters                      6.936e-01  1.053e-01   6.586 4.52e-11 ***
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                  6.451e-01  1.746e-01   3.696 0.000219 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     5.857e+12  8.219e+12   0.713 0.476116    
## V6.Married.AF.spouse            5.857e+12  8.219e+12   0.713 0.476116    
## V6.Married.civ.spouse           5.857e+12  8.219e+12   0.713 0.476116    
## V6.Married.spouse.absent        5.857e+12  8.219e+12   0.713 0.476116    
## V6.Never.married                5.857e+12  8.219e+12   0.713 0.476116    
## V6.Separated                    5.857e+12  8.219e+12   0.713 0.476116    
## V6.Widowed                      5.857e+12  8.219e+12   0.713 0.476116    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 1.174e+00  1.622e-01   7.241 4.45e-13 ***
## V7.Armed.Forces                 2.544e+01  4.547e+05   0.000 0.999955    
## V7.Craft.repair                 5.911e-02  9.923e-02   0.596 0.551370    
## V7.Exec.managerial              8.265e-01  1.031e-01   8.018 1.08e-15 ***
## V7.Farming.fishing             -5.710e-01  1.568e-01  -3.641 0.000272 ***
## V7.Handlers.cleaners            7.427e-01  2.249e-01   3.302 0.000959 ***
## V7.Machine.op.inspct            9.437e-01  1.499e-01   6.295 3.08e-10 ***
## V7.Other.service                6.674e-01  2.170e-01   3.075 0.002106 ** 
## V7.Priv.house.serv                     NA         NA      NA       NA    
## V7.Prof.specialty               6.226e-01  1.134e-01   5.491 4.00e-08 ***
## V7.Protective.serv              6.205e-01  1.546e-01   4.013 5.99e-05 ***
## V7.Sales                        6.215e-01  1.098e-01   5.659 1.52e-08 ***
## V7.Tech.support                 1.051e+00  1.603e-01   6.557 5.49e-11 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.759e+01  6.707e+03  -0.003 0.997907    
## V8.Not.in.family               -1.569e+01  6.707e+03  -0.002 0.998134    
## V8.Other.relative              -1.640e+01  6.707e+03  -0.002 0.998049    
## V8.Own.child                   -1.621e+01  6.707e+03  -0.002 0.998072    
## V8.Unmarried                    3.895e+00  1.423e+04   0.000 0.999782    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           8.385e-02  3.942e-01   0.213 0.831545    
## V9.Asian.Pac.Islander           3.760e-01  2.189e-01   1.718 0.085858 .  
## V9.Black                        1.428e+00  1.805e-01   7.911 2.55e-15 ***
## V9.Other                        6.009e-01  5.236e-01   1.148 0.251056    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      2.021e+01  2.705e+04   0.001 0.999404    
## V10.Male                               NA         NA      NA       NA    
## V11                             2.628e-04  1.505e-05  17.457  < 2e-16 ***
## V12                             4.563e-04  4.835e-05   9.437  < 2e-16 ***
## V13                             9.090e-03  2.134e-03   4.261 2.04e-05 ***
## V14..                          -4.094e-01  7.547e-01  -0.542 0.587511    
## V14.Cambodia                    2.704e+01  1.601e+05   0.000 0.999865    
## V14.Canada                      1.954e-01  8.033e-01   0.243 0.807788    
## V14.China                      -1.112e+00  8.743e-01  -1.272 0.203343    
## V14.Columbia                   -1.936e+00  1.371e+00  -1.412 0.157969    
## V14.Cuba                        6.579e-01  8.684e-01   0.758 0.448730    
## V14.Dominican.Republic         -2.712e+01  3.323e+05   0.000 0.999935    
## V14.Ecuador                     5.720e-01  1.379e+00   0.415 0.678398    
## V14.El.Salvador                 7.624e-01  1.174e+00   0.649 0.516140    
## V14.England                     2.200e-01  8.443e-01   0.261 0.794425    
## V14.France                      3.260e-01  1.048e+00   0.311 0.755834    
## V14.Germany                     3.628e-01  8.106e-01   0.448 0.654406    
## V14.Greece                     -1.226e+00  9.702e-01  -1.263 0.206489    
## V14.Guatemala                   2.391e+01  4.279e+05   0.000 0.999955    
## V14.Haiti                      -1.593e+00  1.466e+00  -1.086 0.277416    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -3.123e+01  2.787e+04  -0.001 0.999106    
## V14.Hong                       -1.171e-01  1.177e+00  -0.099 0.920777    
## V14.Hungary                     6.350e-03  1.234e+00   0.005 0.995894    
## V14.India                      -1.125e+00  8.265e-01  -1.361 0.173578    
## V14.Iran                       -3.755e-01  8.938e-01  -0.420 0.674411    
## V14.Ireland                     2.622e+01  1.848e+05   0.000 0.999887    
## V14.Italy                       1.385e-01  8.353e-01   0.166 0.868285    
## V14.Jamaica                    -5.475e-01  1.136e+00  -0.482 0.629679    
## V14.Japan                      -5.785e-01  8.783e-01  -0.659 0.510150    
## V14.Laos                        2.575e+01  4.859e+05   0.000 0.999958    
## V14.Mexico                      3.309e-01  8.242e-01   0.401 0.688069    
## V14.Nicaragua                  -9.381e-02  1.611e+00  -0.058 0.953576    
## V14.Outlying.US.Guam.USVI.etc. -2.516e+01  2.619e+05   0.000 0.999923    
## V14.Peru                        8.124e-01  1.637e+00   0.496 0.619710    
## V14.Philippines                 7.155e-01  8.472e-01   0.845 0.398352    
## V14.Poland                     -1.016e-01  8.876e-01  -0.115 0.908840    
## V14.Portugal                   -1.716e-01  1.230e+00  -0.139 0.889116    
## V14.Puerto.Rico                 6.469e-01  1.148e+00   0.563 0.573193    
## V14.Scotland                    2.685e+01  2.979e+05   0.000 0.999928    
## V14.South                      -7.416e-01  9.084e-01  -0.816 0.414247    
## V14.Taiwan                     -7.607e-01  9.396e-01  -0.810 0.418185    
## V14.Thailand                   -3.482e-01  1.454e+00  -0.240 0.810683    
## V14.Trinadad.Tobago             2.602e+01  2.845e+05   0.000 0.999927    
## V14.United.States               1.120e-01  7.348e-01   0.152 0.878850    
## V14.Vietnam                    -1.051e+00  1.183e+00  -0.888 0.374299    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13736  on 10275  degrees of freedom
## Residual deviance: 11002  on 10179  degrees of freedom
## AIC: 11196
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    984   360
##      >50K    6432  1992
##                                           
##                Accuracy : 0.3047          
##                  95% CI : (0.2955, 0.3139)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0108         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1327          
##             Specificity : 0.8469          
##          Pos Pred Value : 0.7321          
##          Neg Pred Value : 0.2365          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1007          
##    Detection Prevalence : 0.1376          
##       Balanced Accuracy : 0.4898          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K    984   360
##      >50K    6432  1992
##                                           
##                Accuracy : 0.3047          
##                  95% CI : (0.2955, 0.3139)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0108         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1327          
##             Specificity : 0.8469          
##          Pos Pred Value : 0.7321          
##          Neg Pred Value : 0.2365          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1007          
##    Detection Prevalence : 0.1376          
##       Balanced Accuracy : 0.4898          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.3046683     -0.0108296      0.2955489      0.3139043      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.40.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1326861            0.8469388            0.7321429 
##       Neg Pred Value            Precision               Recall 
##            0.2364672            0.7321429            0.1326861 
##                   F1           Prevalence       Detection Rate 
##            0.2246575            0.7592138            0.1007371 
## Detection Prevalence    Balanced Accuracy 
##            0.1375921            0.4898124
ad_tda_pc_5.40.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.40.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n2_lr_fit_re)
diff_tda_pca_5.40.5_lr_n2_3_fold
##    Accuracy
## 1 0.1376514
## 2 0.2485839
## 3 0.1541169
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n2_3_fold$probRight
bst_tda_pca_5.40.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0091
## 
## $winRight
## [1] 0.9909
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n2_3_fold
## $left
## [1] 0.02067505
## 
## $rope
## [1] 0.004760408
## 
## $right
## [1] 0.9745645
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold))
#bf_tda_pca_5.40.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold)
## t = 5.2115, df = 2, p-value = 0.0349
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.03141071 0.32882410
## sample estimates:
## mean of x 
## 0.1801174
### Test set diff
diff_tda_pca_5.40.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n2_test
##  Accuracy 
## 0.5486282
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n2_test_odds.left<-bst_tda_pca_5.40.5_lr.n2_test$probLeft/bst_tda_pca_5.40.5_lr.n2_test$probRight
bst_tda_pca_5.40.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1561667
## 
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n2_test)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n2_test)) #bf_tda_pca_5.40.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n2_test))

##Node3

Adult_TDA_PC_5.40.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.40.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n3_LrFit0
## Generalized Linear Model 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7708, 7708, 7710 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8642225  0.5365206
Adult_TDA_PC_5.40.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8666667 0.5433624    Fold1
## 2 0.8594034 0.5229379    Fold2
## 3 0.8665975 0.5432615    Fold3
ad_tda_pc_5.40.5_n3_lr_fit_re<-Adult_TDA_PC_5.40.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.803e+13  1.163e+13   1.550 0.121182    
## V1                             -5.397e-03  2.909e-03  -1.855 0.063578 .  
## V2..                           -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Federal.gov                 -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Local.gov                   -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                     -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Self.emp.inc                -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Self.emp.not.inc            -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.State.gov                   -1.803e+13  1.163e+13  -1.550 0.121182    
## V2.Without.pay                 -1.803e+13  1.163e+13  -1.550 0.121182    
## V3                              1.227e-06  2.921e-07   4.200 2.67e-05 ***
## V4.10th                        -2.850e-02  2.121e-01  -0.134 0.893110    
## V4.11th                         1.232e-01  2.030e-01   0.607 0.543848    
## V4.12th                         3.389e-01  3.088e-01   1.098 0.272372    
## V4.1st.4th                     -6.829e-01  5.521e-01  -1.237 0.216163    
## V4.5th.6th                     -8.191e-01  4.082e-01  -2.006 0.044805 *  
## V4.7th.8th                     -1.211e+00  3.186e-01  -3.801 0.000144 ***
## V4.9th                         -6.215e-01  2.876e-01  -2.161 0.030696 *  
## V4.Assoc.acdm                  -7.835e-01  1.808e-01  -4.334 1.46e-05 ***
## V4.Assoc.voc                   -5.034e-01  1.610e-01  -3.126 0.001773 ** 
## V4.Bachelors                   -8.702e-01  1.063e-01  -8.187 2.67e-16 ***
## V4.Doctorate                   -3.791e-01  2.429e-01  -1.561 0.118492    
## V4.HS.grad                     -4.174e-01  8.962e-02  -4.658 3.20e-06 ***
## V4.Masters                     -6.383e-01  1.401e-01  -4.556 5.21e-06 ***
## V4.Preschool                   -2.422e+01  8.346e+04   0.000 0.999768    
## V4.Prof.school                 -3.844e-01  2.228e-01  -1.725 0.084507 .  
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -5.764e-01  2.263e-01  -2.547 0.010852 *  
## V6.Married.AF.spouse            1.472e+00  9.413e-01   1.564 0.117770    
## V6.Married.civ.spouse          -8.652e-01  3.720e-01  -2.326 0.020018 *  
## V6.Married.spouse.absent       -3.209e-01  3.452e-01  -0.930 0.352478    
## V6.Never.married               -3.522e-01  2.374e-01  -1.484 0.137884    
## V6.Separated                   -4.518e-01  2.966e-01  -1.523 0.127674    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 1.802e+00  1.740e-01  10.354  < 2e-16 ***
## V7.Armed.Forces                -2.462e+01  1.927e+05   0.000 0.999898    
## V7.Craft.repair                 1.711e-02  1.639e-01   0.104 0.916863    
## V7.Exec.managerial              4.203e-01  1.637e-01   2.568 0.010219 *  
## V7.Farming.fishing             -1.183e+00  3.599e-01  -3.287 0.001013 ** 
## V7.Handlers.cleaners            9.947e-01  2.056e-01   4.838 1.31e-06 ***
## V7.Machine.op.inspct            1.095e+00  1.749e-01   6.260 3.84e-10 ***
## V7.Other.service                8.387e-01  1.956e-01   4.288 1.81e-05 ***
## V7.Priv.house.serv             -2.417e+00  1.108e+01  -0.218 0.827298    
## V7.Prof.specialty               3.677e-01  1.701e-01   2.162 0.030602 *  
## V7.Protective.serv              4.760e-01  2.474e-01   1.924 0.054361 .  
## V7.Sales                        7.874e-01  1.681e-01   4.683 2.82e-06 ***
## V7.Tech.support                 9.623e-01  2.226e-01   4.323 1.54e-05 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -2.068e+00  1.603e-01 -12.907  < 2e-16 ***
## V8.Not.in.family               -1.824e-01  3.293e-01  -0.554 0.579636    
## V8.Other.relative              -6.772e-01  3.077e-01  -2.200 0.027772 *  
## V8.Own.child                   -6.956e-01  3.208e-01  -2.169 0.030104 *  
## V8.Unmarried                    1.441e-01  3.491e-01   0.413 0.679685    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           8.046e-01  3.025e-01   2.660 0.007817 ** 
## V9.Asian.Pac.Islander           9.112e-01  2.493e-01   3.656 0.000257 ***
## V9.Black                        1.430e+00  1.111e-01  12.868  < 2e-16 ***
## V9.Other                        9.549e-01  3.565e-01   2.679 0.007388 ** 
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.851e+00  1.321e-01  14.010  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             2.780e-04  1.688e-05  16.471  < 2e-16 ***
## V12                             2.364e-04  6.185e-05   3.822 0.000132 ***
## V13                            -3.632e-03  2.811e-03  -1.292 0.196316    
## V14..                          -1.719e+00  1.009e+00  -1.704 0.088314 .  
## V14.Cambodia                   -1.515e-01  1.253e+00  -0.121 0.903803    
## V14.Canada                     -1.618e+00  1.087e+00  -1.489 0.136590    
## V14.China                      -2.803e+00  1.173e+00  -2.389 0.016883 *  
## V14.Columbia                   -2.356e+00  1.475e+00  -1.597 0.110170    
## V14.Cuba                       -5.173e-01  1.097e+00  -0.472 0.637207    
## V14.Dominican.Republic         -3.042e+00  1.545e+00  -1.969 0.048975 *  
## V14.Ecuador                    -1.109e+00  1.474e+00  -0.753 0.451689    
## V14.El.Salvador                -1.602e+00  1.218e+00  -1.315 0.188578    
## V14.England                    -1.419e+00  1.096e+00  -1.295 0.195293    
## V14.France                     -1.199e+00  1.294e+00  -0.927 0.354138    
## V14.Germany                    -1.254e+00  1.086e+00  -1.155 0.248162    
## V14.Greece                     -3.113e+00  1.389e+00  -2.242 0.024975 *  
## V14.Guatemala                  -1.501e+00  1.305e+00  -1.150 0.250119    
## V14.Haiti                      -6.966e-01  1.222e+00  -0.570 0.568568    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                    2.428e+01  4.019e+05   0.000 0.999952    
## V14.Hong                       -1.949e+00  1.851e+00  -1.053 0.292456    
## V14.Hungary                    -2.239e+00  1.578e+00  -1.419 0.155853    
## V14.India                      -2.462e+00  1.131e+00  -2.176 0.029532 *  
## V14.Iran                       -2.686e+00  1.278e+00  -2.102 0.035544 *  
## V14.Ireland                    -9.362e-01  1.445e+00  -0.648 0.517014    
## V14.Italy                      -9.952e-01  1.135e+00  -0.877 0.380494    
## V14.Jamaica                    -5.144e-01  1.116e+00  -0.461 0.644966    
## V14.Japan                      -1.422e+00  1.207e+00  -1.177 0.239045    
## V14.Laos                       -2.942e+00  1.689e+00  -1.742 0.081514 .  
## V14.Mexico                     -1.828e+00  1.024e+00  -1.785 0.074325 .  
## V14.Nicaragua                  -2.240e+00  1.555e+00  -1.441 0.149604    
## V14.Outlying.US.Guam.USVI.etc. -2.554e+01  1.673e+05   0.000 0.999878    
## V14.Peru                       -9.369e-01  1.485e+00  -0.631 0.528212    
## V14.Philippines                -9.813e-01  1.056e+00  -0.929 0.352967    
## V14.Poland                     -8.324e-01  1.203e+00  -0.692 0.489025    
## V14.Portugal                   -1.973e+00  1.584e+00  -1.245 0.212966    
## V14.Puerto.Rico                -1.126e+00  1.106e+00  -1.019 0.308382    
## V14.Scotland                   -3.174e+00  1.876e+00  -1.692 0.090634 .  
## V14.South                      -2.803e+00  1.192e+00  -2.351 0.018708 *  
## V14.Taiwan                     -1.450e+00  1.248e+00  -1.161 0.245556    
## V14.Thailand                   -2.264e+00  1.670e+00  -1.356 0.175204    
## V14.Trinadad.Tobago            -1.254e+00  1.312e+00  -0.955 0.339442    
## V14.United.States              -1.480e+00  9.856e-01  -1.502 0.133181    
## V14.Vietnam                    -2.058e+00  1.241e+00  -1.659 0.097165 .  
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 11831.9  on 11562  degrees of freedom
## Residual deviance:  7317.2  on 11465  degrees of freedom
## AIC: 7513.2
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3896  1821
##      >50K    3520   531
##                                           
##                Accuracy : 0.4532          
##                  95% CI : (0.4433, 0.4632)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1996         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5254          
##             Specificity : 0.2258          
##          Pos Pred Value : 0.6815          
##          Neg Pred Value : 0.1311          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3989          
##    Detection Prevalence : 0.5853          
##       Balanced Accuracy : 0.3756          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3896  1821
##      >50K    3520   531
##                                           
##                Accuracy : 0.4532          
##                  95% CI : (0.4433, 0.4632)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1996         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5254          
##             Specificity : 0.2258          
##          Pos Pred Value : 0.6815          
##          Neg Pred Value : 0.1311          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3989          
##    Detection Prevalence : 0.5853          
##       Balanced Accuracy : 0.3756          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.532146e-01  -1.996451e-01   4.433066e-01   4.631505e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  2.058531e-119
ad_tda_pc_5.40.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.5253506            0.2257653            0.6814763 
##       Neg Pred Value            Precision               Recall 
##            0.1310787            0.6814763            0.5253506 
##                   F1           Prevalence       Detection Rate 
##            0.5933146            0.7592138            0.3988534 
## Detection Prevalence    Balanced Accuracy 
##            0.5852785            0.3755579
ad_tda_pc_5.40.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.40.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n3_lr_fit_re)
diff_tda_pca_5.40.5_lr_n3_3_fold
##    Accuracy
## 1 0.1376514
## 2 0.2485839
## 3 0.1541169
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n3_3_fold$probRight
bst_tda_pca_5.40.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0077
## 
## $winRight
## [1] 0.9923
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n3_3_fold
## $left
## [1] 0.02067505
## 
## $rope
## [1] 0.004760408
## 
## $right
## [1] 0.9745645
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold))
#bf_tda_pca_5.40.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold)
## t = 5.2115, df = 2, p-value = 0.0349
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.03141071 0.32882410
## sample estimates:
## mean of x 
## 0.1801174
### Test set diff
diff_tda_pca_5.40.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n3_test
##  Accuracy 
## 0.4000819
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n3_test_odds.left<-bst_tda_pca_5.40.5_lr.n3_test$probLeft/bst_tda_pca_5.40.5_lr.n3_test$probRight
bst_tda_pca_5.40.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1607333
## 
## $winRight
## [1] 0.8392667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n3_test)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n3_test)) #bf_tda_pca_5.40.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n3_test))

##Node4

Adult_TDA_PC_5.40.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.40.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n4_LrFit0
## Generalized Linear Model 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9878, 9879, 9879 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9437159  0.1719718
Adult_TDA_PC_5.40.5_n4_LrFit0$resample
##    Accuracy      Kappa Resample
## 1 0.9621457 0.29666063    Fold1
## 2 0.9591010 0.12915683    Fold2
## 3 0.9099008 0.09009785    Fold3
ad_tda_pc_5.40.5_n4_lr_fit_re<-Adult_TDA_PC_5.40.5_n4_LrFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (10 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     6.412e+11  9.399e+12   0.068 0.945608    
## V1                              1.519e-02  4.377e-03   3.472 0.000517 ***
## V2..                           -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Federal.gov                 -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Local.gov                   -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Never.worked                -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Private                     -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Self.emp.inc                -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Self.emp.not.inc            -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.State.gov                   -6.412e+11  9.399e+12  -0.068 0.945608    
## V2.Without.pay                 -6.412e+11  9.399e+12  -0.068 0.945608    
## V3                              7.350e-07  4.291e-07   1.713 0.086754 .  
## V4.10th                        -1.605e+00  6.216e-01  -2.581 0.009839 ** 
## V4.11th                         4.964e-02  3.183e-01   0.156 0.876068    
## V4.12th                        -3.181e-01  5.001e-01  -0.636 0.524736    
## V4.1st.4th                     -2.254e+01  2.839e+04  -0.001 0.999367    
## V4.5th.6th                     -2.777e-01  6.642e-01  -0.418 0.675875    
## V4.7th.8th                     -9.178e-01  5.493e-01  -1.671 0.094740 .  
## V4.9th                          2.554e-01  4.222e-01   0.605 0.545301    
## V4.Assoc.acdm                  -2.233e-01  2.381e-01  -0.938 0.348265    
## V4.Assoc.voc                   -1.595e-01  2.252e-01  -0.708 0.478793    
## V4.Bachelors                    2.113e-01  1.442e-01   1.465 0.142957    
## V4.Doctorate                    8.018e-01  4.790e-01   1.674 0.094127 .  
## V4.HS.grad                     -3.442e-01  1.284e-01  -2.682 0.007325 ** 
## V4.Masters                      3.040e-01  2.173e-01   1.399 0.161788    
## V4.Preschool                   -4.478e+01  1.134e+07   0.000 0.999997    
## V4.Prof.school                  3.716e-02  5.252e-01   0.071 0.943592    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     1.835e-01  2.169e-01   0.846 0.397702    
## V6.Married.AF.spouse            3.061e+00  8.153e-01   3.755 0.000174 ***
## V6.Married.civ.spouse           1.740e+00  4.737e-01   3.674 0.000239 ***
## V6.Married.spouse.absent       -1.296e-01  4.366e-01  -0.297 0.766550    
## V6.Never.married                1.012e-01  2.414e-01   0.419 0.675094    
## V6.Separated                   -1.769e-01  3.169e-01  -0.558 0.576551    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 3.451e-01  3.334e-01   1.035 0.300586    
## V7.Armed.Forces                -2.248e+01  1.290e+05   0.000 0.999861    
## V7.Craft.repair                -3.170e-02  3.674e-01  -0.086 0.931258    
## V7.Exec.managerial              3.084e-01  3.419e-01   0.902 0.367074    
## V7.Farming.fishing             -2.030e+00  1.036e+00  -1.960 0.049999 *  
## V7.Handlers.cleaners           -2.751e-01  4.662e-01  -0.590 0.555156    
## V7.Machine.op.inspct           -6.153e-01  4.103e-01  -1.500 0.133726    
## V7.Other.service               -1.293e-02  3.505e-01  -0.037 0.970582    
## V7.Priv.house.serv             -3.929e+00  2.541e+00  -1.546 0.122043    
## V7.Prof.specialty               2.244e-01  3.471e-01   0.646 0.517985    
## V7.Protective.serv              4.390e-01  5.046e-01   0.870 0.384254    
## V7.Sales                        8.373e-02  3.482e-01   0.240 0.809977    
## V7.Tech.support                 6.557e-02  3.973e-01   0.165 0.868918    
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.930e+01  9.024e+02  -0.021 0.982935    
## V8.Not.in.family                5.919e-02  4.361e-01   0.136 0.892026    
## V8.Other.relative              -1.111e+00  4.414e-01  -2.517 0.011835 *  
## V8.Own.child                   -1.011e+00  4.228e-01  -2.390 0.016847 *  
## V8.Unmarried                    6.885e-02  4.485e-01   0.154 0.877994    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           7.619e-01  3.593e-01   2.121 0.033929 *  
## V9.Asian.Pac.Islander           8.478e-01  3.049e-01   2.781 0.005420 ** 
## V9.Black                        4.978e-01  1.444e-01   3.447 0.000567 ***
## V9.Other                        2.358e-01  5.227e-01   0.451 0.651914    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      9.009e-01  1.337e-01   6.737 1.61e-11 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.732e-04  1.941e-05  19.221  < 2e-16 ***
## V12                             3.180e-04  1.091e-04   2.915 0.003556 ** 
## V13                             2.336e-02  4.150e-03   5.629 1.81e-08 ***
## V14..                          -2.485e+00  1.370e+00  -1.814 0.069683 .  
## V14.Cambodia                   -2.263e+01  3.387e+04  -0.001 0.999467    
## V14.Canada                     -2.005e+00  1.468e+00  -1.365 0.172200    
## V14.China                      -2.234e+00  1.520e+00  -1.470 0.141654    
## V14.Columbia                   -2.491e+01  5.734e+04   0.000 0.999653    
## V14.Cuba                       -3.219e+00  1.691e+00  -1.904 0.056929 .  
## V14.Dominican.Republic         -2.280e+00  1.699e+00  -1.341 0.179766    
## V14.Ecuador                    -2.494e+01  1.008e+05   0.000 0.999803    
## V14.El.Salvador                -2.457e+01  4.202e+04  -0.001 0.999534    
## V14.England                    -1.905e+00  1.440e+00  -1.323 0.185849    
## V14.France                     -2.351e+00  1.805e+00  -1.303 0.192718    
## V14.Germany                    -2.326e+00  1.454e+00  -1.600 0.109585    
## V14.Greece                     -1.386e+00  1.768e+00  -0.784 0.433012    
## V14.Guatemala                  -2.640e-02  1.533e+00  -0.017 0.986258    
## V14.Haiti                      -2.542e+00  1.728e+00  -1.471 0.141333    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -2.488e+01  1.078e+05   0.000 0.999816    
## V14.Hong                       -2.643e+01  9.444e+04   0.000 0.999777    
## V14.Hungary                    -1.335e+00  1.760e+00  -0.758 0.448256    
## V14.India                      -2.101e+00  1.569e+00  -1.339 0.180475    
## V14.Iran                       -2.531e+01  8.628e+04   0.000 0.999766    
## V14.Ireland                    -1.500e+00  1.699e+00  -0.883 0.377335    
## V14.Italy                      -2.374e+00  1.702e+00  -1.395 0.163066    
## V14.Jamaica                    -2.420e+00  1.731e+00  -1.398 0.162041    
## V14.Japan                      -1.050e+00  1.466e+00  -0.716 0.473999    
## V14.Laos                       -2.382e+00  1.781e+00  -1.337 0.181208    
## V14.Mexico                     -2.643e+00  1.426e+00  -1.853 0.063902 .  
## V14.Nicaragua                  -2.430e+01  7.563e+04   0.000 0.999744    
## V14.Outlying.US.Guam.USVI.etc. -2.591e+01  9.923e+04   0.000 0.999792    
## V14.Peru                       -2.528e+01  9.419e+04   0.000 0.999786    
## V14.Philippines                -2.829e+00  1.443e+00  -1.960 0.050050 .  
## V14.Poland                     -2.200e+00  1.686e+00  -1.305 0.192010    
## V14.Portugal                   -1.713e+00  1.748e+00  -0.980 0.326910    
## V14.Puerto.Rico                -2.029e+00  1.470e+00  -1.380 0.167657    
## V14.Scotland                   -2.593e+01  1.333e+05   0.000 0.999845    
## V14.South                      -2.926e+00  1.570e+00  -1.864 0.062332 .  
## V14.Taiwan                     -1.801e+00  1.558e+00  -1.156 0.247838    
## V14.Thailand                   -2.629e+01  1.105e+05   0.000 0.999810    
## V14.Trinadad.Tobago            -2.562e+01  1.165e+05   0.000 0.999825    
## V14.United.States              -2.316e+00  1.323e+00  -1.751 0.080005 .  
## V14.Vietnam                    -2.168e+00  1.616e+00  -1.341 0.179941    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5218.0  on 14817  degrees of freedom
## Residual deviance: 3811.1  on 14719  degrees of freedom
## AIC: 4009.1
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7408  2188
##      >50K       8   164
##                                           
##                Accuracy : 0.7752          
##                  95% CI : (0.7668, 0.7834)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0001051       
##                                           
##                   Kappa : 0.1004          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99892         
##             Specificity : 0.06973         
##          Pos Pred Value : 0.77199         
##          Neg Pred Value : 0.95349         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75839         
##    Detection Prevalence : 0.98239         
##       Balanced Accuracy : 0.53432         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7408  2188
##      >50K       8   164
##                                           
##                Accuracy : 0.7752          
##                  95% CI : (0.7668, 0.7834)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0001051       
##                                           
##                   Kappa : 0.1004          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99892         
##             Specificity : 0.06973         
##          Pos Pred Value : 0.77199         
##          Neg Pred Value : 0.95349         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75839         
##    Detection Prevalence : 0.98239         
##       Balanced Accuracy : 0.53432         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7751842752   0.1004311785   0.7667730166   0.7834311474   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   0.0001051039   0.0000000000
ad_tda_pc_5.40.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99892125           0.06972789           0.77198833 
##       Neg Pred Value            Precision               Recall 
##           0.95348837           0.77198833           0.99892125 
##                   F1           Prevalence       Detection Rate 
##           0.87091465           0.75921376           0.75839476 
## Detection Prevalence    Balanced Accuracy 
##           0.98239148           0.53432457
ad_tda_pc_5.40.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.40.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n4_lr_fit_re)
diff_tda_pca_5.40.5_lr_n4_3_fold
##     Accuracy
## 1 -0.1152123
## 2 -0.1032179
## 3 -0.0594335
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n4_3_fold$probRight
bst_tda_pca_5.40.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n4_3_fold
## $winLeft
## [1] 0.9904667
## 
## $winRope
## [1] 0.009533333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n4_3_fold
## $left
## [1] 0.9740985
## 
## $rope
## [1] 0.008647264
## 
## $right
## [1] 0.01725419
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold))
#bf_tda_pca_5.40.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold)
## t = -5.464, df = 2, p-value = 0.0319
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.16555668 -0.01968582
## sample estimates:
##   mean of x 
## -0.09262125
### Test set diff
diff_tda_pca_5.40.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n4_test
##  Accuracy 
## 0.0781122
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n4_test_odds.left<-bst_tda_pca_5.40.5_lr.n4_test$probLeft/bst_tda_pca_5.40.5_lr.n4_test$probRight
bst_tda_pca_5.40.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1587
## 
## $winRight
## [1] 0.8413
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n4_test)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n4_test)) #bf_tda_pca_5.40.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n4_test))

##Node5

Adult_TDA_PC_5.40.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.40.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n5_LrFit0
## Generalized Linear Model 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8054, 8054, 8054 
## Resampling results:
## 
##   Accuracy   Kappa      
##   0.9834451  0.002863384
Adult_TDA_PC_5.40.5_n5_LrFit0$resample
##    Accuracy         Kappa Resample
## 1 0.9990067 -0.0003726245    Fold1
## 2 0.9990067 -0.0003726245    Fold2
## 3 0.9523218  0.0093354005    Fold3
ad_tda_pc_5.40.5_n5_lr_fit_re<-Adult_TDA_PC_5.40.5_n5_LrFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                     2.528e+23  1.346e+16   18776801   <2e-16 ***
## V1                              2.389e+13  6.427e+04  371651628   <2e-16 ***
## V2..                            1.154e+22  2.314e+14   49864713   <2e-16 ***
## V2.Federal.gov                  1.154e+22  2.314e+14   49864730   <2e-16 ***
## V2.Local.gov                    1.154e+22  2.314e+14   49864715   <2e-16 ***
## V2.Never.worked                 1.154e+22  2.314e+14   49864730   <2e-16 ***
## V2.Private                      1.154e+22  2.314e+14   49864724   <2e-16 ***
## V2.Self.emp.inc                 1.154e+22  2.314e+14   49864716   <2e-16 ***
## V2.Self.emp.not.inc             1.154e+22  2.314e+14   49864721   <2e-16 ***
## V2.State.gov                    1.154e+22  2.314e+14   49864730   <2e-16 ***
## V2.Without.pay                  1.154e+22  2.314e+14   49864725   <2e-16 ***
## V3                              7.745e+08  5.323e+00  145516729   <2e-16 ***
## V4.10th                        -1.234e+14  3.167e+06  -38952445   <2e-16 ***
## V4.11th                        -1.478e+15  2.721e+06 -543256694   <2e-16 ***
## V4.12th                         8.749e+14  4.164e+06  210120087   <2e-16 ***
## V4.1st.4th                     -3.973e+14  7.756e+06  -51217457   <2e-16 ***
## V4.5th.6th                      1.890e+15  5.879e+06  321479764   <2e-16 ***
## V4.7th.8th                     -5.671e+14  4.611e+06 -122975228   <2e-16 ***
## V4.9th                          1.104e+15  4.415e+06  250073194   <2e-16 ***
## V4.Assoc.acdm                   1.453e+15  3.633e+06  400118486   <2e-16 ***
## V4.Assoc.voc                   -1.893e+15  3.505e+06 -540294728   <2e-16 ***
## V4.Bachelors                   -2.043e+15  2.594e+06 -787806522   <2e-16 ***
## V4.Doctorate                    5.261e+14  3.879e+07   13560647   <2e-16 ***
## V4.HS.grad                      8.536e+14  1.554e+06  549287185   <2e-16 ***
## V4.Masters                      5.098e+14  6.897e+06   73916108   <2e-16 ***
## V4.Preschool                   -7.942e+14  1.168e+07  -67976968   <2e-16 ***
## V4.Prof.school                 -1.694e+15  2.395e+07  -70706865   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                    -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V6.Married.AF.spouse           -2.643e+23  1.351e+16  -19568905   <2e-16 ***
## V6.Married.civ.spouse          -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V6.Married.spouse.absent       -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V6.Never.married               -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V6.Separated                   -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V6.Widowed                     -2.643e+23  1.351e+16  -19568906   <2e-16 ***
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -3.265e+14  4.647e+06  -70257398   <2e-16 ***
## V7.Armed.Forces                -4.665e+15  4.787e+07  -97450463   <2e-16 ***
## V7.Craft.repair                 2.108e+14  4.902e+06   43005445   <2e-16 ***
## V7.Exec.managerial              1.235e+14  5.240e+06   23565555   <2e-16 ***
## V7.Farming.fishing              2.963e+14  6.693e+06   44271086   <2e-16 ***
## V7.Handlers.cleaners            3.236e+14  4.612e+06   70165336   <2e-16 ***
## V7.Machine.op.inspct           -1.470e+15  4.973e+06 -295567434   <2e-16 ***
## V7.Other.service                4.414e+13  4.590e+06    9617761   <2e-16 ***
## V7.Priv.house.serv             -3.686e+14  7.296e+06  -50516843   <2e-16 ***
## V7.Prof.specialty              -2.620e+14  5.270e+06  -49719442   <2e-16 ***
## V7.Protective.serv              6.283e+13  8.165e+06    7695092   <2e-16 ***
## V7.Sales                        1.131e+15  4.784e+06  236460930   <2e-16 ***
## V7.Tech.support                 1.395e+15  5.826e+06  239435477   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                             NA         NA         NA       NA    
## V8.Not.in.family                3.482e+15  9.682e+06  359616643   <2e-16 ***
## V8.Other.relative               4.885e+15  9.668e+06  505245465   <2e-16 ***
## V8.Own.child                    3.720e+15  9.637e+06  385960479   <2e-16 ***
## V8.Unmarried                    3.304e+15  9.742e+06  339125425   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -1.717e+14  5.239e+06  -32771224   <2e-16 ***
## V9.Asian.Pac.Islander          -1.508e+14  5.099e+06  -29577939   <2e-16 ***
## V9.Black                       -3.632e+14  1.702e+06 -213386456   <2e-16 ***
## V9.Other                       -1.156e+13  5.428e+06   -2129090   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                      4.905e+14  1.604e+06  305773612   <2e-16 ***
## V10.Male                               NA         NA         NA       NA    
## V11                            -3.734e+11  7.408e+02 -504091032   <2e-16 ***
## V12                            -9.507e+11  2.509e+03 -378913694   <2e-16 ***
## V13                            -3.690e+10  5.427e+04    -679912   <2e-16 ***
## V14..                           3.980e+12  3.910e+07     101777   <2e-16 ***
## V14.Cambodia                    1.201e+15  4.931e+07   24358909   <2e-16 ***
## V14.Canada                     -4.481e+14  4.053e+07  -11055459   <2e-16 ***
## V14.China                      -1.591e+15  4.220e+07  -37706892   <2e-16 ***
## V14.Columbia                   -7.221e+14  4.039e+07  -17876415   <2e-16 ***
## V14.Cuba                       -8.205e+14  4.034e+07  -20337752   <2e-16 ***
## V14.Dominican.Republic         -3.375e+14  3.999e+07   -8439025   <2e-16 ***
## V14.Ecuador                    -7.987e+14  4.345e+07  -18383118   <2e-16 ***
## V14.El.Salvador                -1.195e+15  3.964e+07  -30145689   <2e-16 ***
## V14.England                    -1.332e+15  4.090e+07  -32579623   <2e-16 ***
## V14.France                     -1.045e+14  4.554e+07   -2295046   <2e-16 ***
## V14.Germany                    -5.695e+14  3.988e+07  -14279993   <2e-16 ***
## V14.Greece                      6.868e+14  5.484e+07   12523143   <2e-16 ***
## V14.Guatemala                  -5.692e+14  4.005e+07  -14214327   <2e-16 ***
## V14.Haiti                      -2.853e+14  4.052e+07   -7040838   <2e-16 ***
## V14.Holand.Netherlands          2.722e+15  7.779e+07   34992795   <2e-16 ***
## V14.Honduras                   -2.491e+15  4.432e+07  -56207905   <2e-16 ***
## V14.Hong                        8.131e+14  4.773e+07   17035327   <2e-16 ***
## V14.Hungary                    -2.933e+15  5.490e+07  -53419238   <2e-16 ***
## V14.India                      -4.417e+14  4.213e+07  -10483773   <2e-16 ***
## V14.Iran                        6.464e+13  4.749e+07    1361005   <2e-16 ***
## V14.Ireland                    -2.555e+14  4.480e+07   -5702740   <2e-16 ***
## V14.Italy                      -5.468e+14  4.213e+07  -12979504   <2e-16 ***
## V14.Jamaica                     7.952e+11  3.990e+07      19932   <2e-16 ***
## V14.Japan                      -5.812e+14  4.215e+07  -13791219   <2e-16 ***
## V14.Laos                        1.176e+15  4.455e+07   26404737   <2e-16 ***
## V14.Mexico                     -1.325e+15  3.902e+07  -33944214   <2e-16 ***
## V14.Nicaragua                  -5.289e+14  4.176e+07  -12664350   <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -8.388e+13  4.425e+07   -1895814   <2e-16 ***
## V14.Peru                        1.525e+14  4.161e+07    3664779   <2e-16 ***
## V14.Philippines                -6.837e+14  3.982e+07  -17171190   <2e-16 ***
## V14.Poland                     -1.525e+15  4.250e+07  -35894641   <2e-16 ***
## V14.Portugal                    6.529e+14  4.279e+07   15257512   <2e-16 ***
## V14.Puerto.Rico                -3.411e+14  3.965e+07   -8602395   <2e-16 ***
## V14.Scotland                    1.643e+15  5.484e+07   29963584   <2e-16 ***
## V14.South                      -1.209e+14  4.095e+07   -2951861   <2e-16 ***
## V14.Taiwan                      2.964e+15  4.262e+07   69540675   <2e-16 ***
## V14.Thailand                   -1.328e+15  4.505e+07  -29470571   <2e-16 ***
## V14.Trinadad.Tobago             3.008e+14  4.426e+07    6797066   <2e-16 ***
## V14.United.States              -3.569e+14  3.881e+07   -9196141   <2e-16 ***
## V14.Vietnam                     5.029e+14  4.053e+07   12408760   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:     72.103  on 12080  degrees of freedom
## Residual deviance: 142804.955  on 11981  degrees of freedom
## AIC: 143005
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6674  2317
##      >50K     742    35
##                                          
##                Accuracy : 0.6868         
##                  95% CI : (0.6775, 0.696)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1104        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.89995        
##             Specificity : 0.01488        
##          Pos Pred Value : 0.74230        
##          Neg Pred Value : 0.04505        
##              Prevalence : 0.75921        
##          Detection Rate : 0.68325        
##    Detection Prevalence : 0.92045        
##       Balanced Accuracy : 0.45741        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.40.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6674  2317
##      >50K     742    35
##                                          
##                Accuracy : 0.6868         
##                  95% CI : (0.6775, 0.696)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1104        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.89995        
##             Specificity : 0.01488        
##          Pos Pred Value : 0.74230        
##          Neg Pred Value : 0.04505        
##              Prevalence : 0.75921        
##          Detection Rate : 0.68325        
##    Detection Prevalence : 0.92045        
##       Balanced Accuracy : 0.45741        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.40.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   6.868346e-01  -1.104181e-01   6.775314e-01   6.960262e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  3.803777e-178
ad_tda_pc_5.40.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.89994606           0.01488095           0.74229785 
##       Neg Pred Value            Precision               Recall 
##           0.04504505           0.74229785           0.89994606 
##                   F1           Prevalence       Detection Rate 
##           0.81355519           0.75921376           0.68325143 
## Detection Prevalence    Balanced Accuracy 
##           0.92045455           0.45741351
ad_tda_pc_5.40.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.40.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n5_lr_fit_re)
diff_tda_pca_5.40.5_lr_n5_3_fold
##     Accuracy
## 1 -0.1520733
## 2 -0.1431236
## 3 -0.1018545
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n5_3_fold$probRight
bst_tda_pca_5.40.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n5_3_fold
## $winLeft
## [1] 0.9903667
## 
## $winRope
## [1] 0.009633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n5_3_fold
## $left
## [1] 0.9896771
## 
## $rope
## [1] 0.002635091
## 
## $right
## [1] 0.007687766
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold))
#bf_tda_pca_5.40.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold)
## t = -8.5579, df = 2, p-value = 0.01338
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.19889226 -0.06580869
## sample estimates:
##  mean of x 
## -0.1323505
### Test set diff
diff_tda_pca_5.40.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n5_test
##  Accuracy 
## 0.1664619
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_lr.n5_test_odds.left<-bst_tda_pca_5.40.5_lr.n5_test$probLeft/bst_tda_pca_5.40.5_lr.n5_test$probRight
bst_tda_pca_5.40.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1602
## 
## $winRight
## [1] 0.8398
# Bayesian Correlated Test

bct_tda_pca_5.40.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n5_test)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n5_test)) #bf_tda_pca_5.40.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.40.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.40.5.n1.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n1_LrFit0
## Generalized Linear Model 
## 
## 11838 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7893, 7891, 7892 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8201559  0.4342154
Adult_TDA_KDE_5.40.5_n1_LrFit0$resample
##    Accuracy      Kappa Resample
## 1 0.8590621 0.61882270    Fold1
## 2 0.8530530 0.60139865    Fold2
## 3 0.7483528 0.08242486    Fold3
ad_tda_kde_5.40.5_n1_lr_fit_re<-Adult_TDA_KDE_5.40.5_n1_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (10 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.113e+13  9.119e+12   1.220 0.222368    
## V1                              1.088e-02  2.619e-03   4.154 3.26e-05 ***
## V2..                           -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Federal.gov                 -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Local.gov                   -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Never.worked                -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Private                     -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Self.emp.inc                -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Self.emp.not.inc            -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.State.gov                   -1.113e+13  9.119e+12  -1.220 0.222368    
## V2.Without.pay                 -1.113e+13  9.119e+12  -1.220 0.222368    
## V3                              5.868e-07  2.255e-07   2.603 0.009251 ** 
## V4.10th                        -1.194e+00  1.755e-01  -6.804 1.01e-11 ***
## V4.11th                        -1.072e+00  1.827e-01  -5.865 4.48e-09 ***
## V4.12th                        -7.273e-01  3.287e-01  -2.212 0.026936 *  
## V4.1st.4th                     -1.808e+00  4.882e-01  -3.702 0.000214 ***
## V4.5th.6th                     -1.547e+00  3.114e-01  -4.968 6.77e-07 ***
## V4.7th.8th                     -1.637e+00  2.000e-01  -8.186 2.70e-16 ***
## V4.9th                         -1.355e+00  2.321e-01  -5.837 5.33e-09 ***
## V4.Assoc.acdm                   3.674e-02  1.795e-01   0.205 0.837846    
## V4.Assoc.voc                    1.062e-01  1.747e-01   0.608 0.543032    
## V4.Bachelors                    5.414e-01  1.086e-01   4.986 6.18e-07 ***
## V4.Doctorate                    1.779e+00  1.825e-01   9.747  < 2e-16 ***
## V4.HS.grad                     -4.552e-01  1.090e-01  -4.177 2.95e-05 ***
## V4.Masters                      9.859e-01  1.325e-01   7.442 9.94e-14 ***
## V4.Preschool                   -3.307e+01  8.064e+04   0.000 0.999673    
## V4.Prof.school                  1.525e+00  1.642e-01   9.288  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -2.655e-01  1.951e-01  -1.361 0.173540    
## V6.Married.AF.spouse            1.235e+00  1.090e+00   1.133 0.257181    
## V6.Married.civ.spouse           1.997e+00  4.655e-01   4.290 1.79e-05 ***
## V6.Married.spouse.absent       -6.959e-01  4.141e-01  -1.681 0.092833 .  
## V6.Never.married               -7.668e-01  2.061e-01  -3.720 0.000199 ***
## V6.Separated                   -3.263e-01  2.905e-01  -1.123 0.261468    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -2.936e-01  1.777e-01  -1.653 0.098422 .  
## V7.Armed.Forces                -3.381e-01  2.167e+00  -0.156 0.876044    
## V7.Craft.repair                -2.075e-02  1.510e-01  -0.137 0.890703    
## V7.Exec.managerial              6.550e-01  1.523e-01   4.302 1.69e-05 ***
## V7.Farming.fishing             -1.048e+00  2.244e-01  -4.671 3.00e-06 ***
## V7.Handlers.cleaners           -8.334e-01  2.594e-01  -3.212 0.001316 ** 
## V7.Machine.op.inspct           -6.281e-01  2.024e-01  -3.103 0.001918 ** 
## V7.Other.service               -1.054e+00  2.183e-01  -4.831 1.36e-06 ***
## V7.Priv.house.serv             -2.366e+01  2.906e+04  -0.001 0.999351    
## V7.Prof.specialty               3.298e-01  1.594e-01   2.069 0.038578 *  
## V7.Protective.serv              2.757e-02  2.502e-01   0.110 0.912245    
## V7.Sales                        1.269e-01  1.601e-01   0.793 0.427879    
## V7.Tech.support                 4.948e-01  2.192e-01   2.257 0.023983 *  
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.201e+00  1.684e-01  -7.133 9.82e-13 ***
## V8.Not.in.family               -4.274e-01  4.533e-01  -0.943 0.345816    
## V8.Other.relative              -1.276e+00  4.407e-01  -2.896 0.003783 ** 
## V8.Own.child                   -1.707e+00  4.694e-01  -3.638 0.000275 ***
## V8.Unmarried                   -3.763e-01  4.681e-01  -0.804 0.421439    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.909e-01  3.557e-01  -0.537 0.591570    
## V9.Asian.Pac.Islander           3.178e-01  2.844e-01   1.117 0.263852    
## V9.Black                       -1.353e-01  1.240e-01  -1.091 0.275281    
## V9.Other                        3.902e-01  4.148e-01   0.941 0.346874    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -9.194e-01  1.213e-01  -7.580 3.46e-14 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.029e-04  1.647e-05  18.384  < 2e-16 ***
## V12                             6.481e-04  5.875e-05  11.031  < 2e-16 ***
## V13                             3.012e-02  2.434e-03  12.376  < 2e-16 ***
## V14..                          -9.887e-01  1.296e+00  -0.763 0.445383    
## V14.Cambodia                    4.047e-01  1.748e+00   0.232 0.816894    
## V14.Canada                     -5.794e-01  1.328e+00  -0.436 0.662715    
## V14.China                      -2.116e+00  1.390e+00  -1.523 0.127830    
## V14.Columbia                   -2.259e+00  1.594e+00  -1.417 0.156421    
## V14.Cuba                       -1.496e+00  1.361e+00  -1.099 0.271608    
## V14.Dominican.Republic         -2.387e+01  4.607e+04  -0.001 0.999587    
## V14.Ecuador                    -2.370e+00  1.996e+00  -1.188 0.234892    
## V14.El.Salvador                -1.635e+00  1.553e+00  -1.052 0.292628    
## V14.England                    -7.178e-01  1.354e+00  -0.530 0.596036    
## V14.France                     -1.059e+00  1.444e+00  -0.733 0.463480    
## V14.Germany                    -3.792e-01  1.350e+00  -0.281 0.778762    
## V14.Greece                     -1.202e+00  1.531e+00  -0.785 0.432213    
## V14.Guatemala                  -1.167e+00  1.623e+00  -0.719 0.472313    
## V14.Haiti                      -1.381e+00  2.137e+00  -0.646 0.518080    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -1.261e+00  3.252e+00  -0.388 0.698132    
## V14.Hong                       -7.677e-01  1.554e+00  -0.494 0.621374    
## V14.Hungary                    -3.315e-01  1.702e+00  -0.195 0.845548    
## V14.India                      -1.664e+00  1.373e+00  -1.212 0.225415    
## V14.Iran                       -1.686e+00  1.519e+00  -1.110 0.266917    
## V14.Ireland                     1.462e-01  2.155e+00   0.068 0.945926    
## V14.Italy                       7.389e-01  1.343e+00   0.550 0.582131    
## V14.Jamaica                    -2.141e+00  1.704e+00  -1.257 0.208906    
## V14.Japan                       2.198e-01  1.491e+00   0.147 0.882794    
## V14.Laos                       -4.481e-02  1.653e+00  -0.027 0.978372    
## V14.Mexico                     -1.378e+00  1.301e+00  -1.059 0.289775    
## V14.Nicaragua                  -1.518e+00  1.528e+00  -0.993 0.320589    
## V14.Outlying.US.Guam.USVI.etc. -2.578e+01  1.448e+05   0.000 0.999858    
## V14.Peru                       -1.132e+00  1.709e+00  -0.662 0.507871    
## V14.Philippines                -1.388e+00  1.350e+00  -1.028 0.304148    
## V14.Poland                     -6.021e-01  1.440e+00  -0.418 0.675849    
## V14.Portugal                   -2.076e+00  1.788e+00  -1.161 0.245464    
## V14.Puerto.Rico                -1.049e-01  1.388e+00  -0.076 0.939739    
## V14.Scotland                    3.049e+00  3.769e+00   0.809 0.418522    
## V14.South                      -2.541e+00  1.470e+00  -1.729 0.083838 .  
## V14.Taiwan                     -4.805e-01  1.475e+00  -0.326 0.744673    
## V14.Thailand                   -1.781e+00  1.972e+00  -0.903 0.366382    
## V14.Trinadad.Tobago            -8.897e-01  1.784e+00  -0.499 0.617965    
## V14.United.States              -7.410e-01  1.277e+00  -0.580 0.561645    
## V14.Vietnam                    -1.149e+00  1.545e+00  -0.744 0.456965    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13553.3  on 11837  degrees of freedom
## Residual deviance:  7290.1  on 11739  degrees of freedom
## AIC: 7488.1
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6965  1007
##      >50K     451  1345
##                                           
##                Accuracy : 0.8507          
##                  95% CI : (0.8435, 0.8577)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5559          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9392          
##             Specificity : 0.5719          
##          Pos Pred Value : 0.8737          
##          Neg Pred Value : 0.7489          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7130          
##    Detection Prevalence : 0.8161          
##       Balanced Accuracy : 0.7555          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6965  1007
##      >50K     451  1345
##                                           
##                Accuracy : 0.8507          
##                  95% CI : (0.8435, 0.8577)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5559          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9392          
##             Specificity : 0.5719          
##          Pos Pred Value : 0.8737          
##          Neg Pred Value : 0.7489          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7130          
##    Detection Prevalence : 0.8161          
##       Balanced Accuracy : 0.7555          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.507371e-01   5.559070e-01   8.435147e-01   8.577497e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  2.669432e-110   7.274250e-48
ad_tda_kde_5.40.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9391855            0.5718537            0.8736829 
##       Neg Pred Value            Precision               Recall 
##            0.7488864            0.8736829            0.9391855 
##                   F1           Prevalence       Detection Rate 
##            0.9052508            0.7592138            0.7130426 
## Detection Prevalence    Balanced Accuracy 
##            0.8161343            0.7555196
ad_tda_kde_5.40.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n1_lr_fit_re)
diff_tda_kde_5.40.5_lr_n1_3_fold
##       Accuracy
## 1 -0.012128700
## 2  0.002830176
## 3  0.102114527
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n1_3_fold$probRight
bst_tda_kde_5.40.5_lr.n1_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n1_3_fold
## $winLeft
## [1] 0.0739
## 
## $winRope
## [1] 0.4478
## 
## $winRight
## [1] 0.4783
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n1_3_fold
## $left
## [1] 0.2134573
## 
## $rope
## [1] 0.1181525
## 
## $right
## [1] 0.6683902
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold))
#bf_tda_kde_5.40.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold)
## t = 0.86303, df = 2, p-value = 0.4791
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1233070  0.1851843
## sample estimates:
##  mean of x 
## 0.03093867
### Test set diff
diff_tda_kde_5.40.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n1_test
##    Accuracy 
## 0.002559378
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n1_test_odds.left<-bst_tda_kde_5.40.5_lr.n1_test$probLeft/bst_tda_kde_5.40.5_lr.n1_test$probRight
bst_tda_kde_5.40.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n1_test)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n1_test)) #bf_tda_pca_5.40.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n1_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.40.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.40.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n2_LrFit0
## Generalized Linear Model 
## 
## 11203 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7469, 7468, 7469 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.842722  0.5987201
Adult_TDA_KDE_5.40.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8478843 0.6098806    Fold1
## 2 0.8278447 0.5648368    Fold2
## 3 0.8524371 0.6214427    Fold3
ad_tda_kde_5.40.5_n2_lr_fit_re<-Adult_TDA_KDE_5.40.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (15 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.108e+12  3.045e+12   0.364 0.715928    
## V1                              4.027e-02  3.200e-03  12.586  < 2e-16 ***
## V2..                           -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Federal.gov                 -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Local.gov                   -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Never.worked                -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Private                     -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Self.emp.inc                -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Self.emp.not.inc            -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.State.gov                   -1.108e+12  3.045e+12  -0.364 0.715928    
## V2.Without.pay                 -1.108e+12  3.045e+12  -0.364 0.715928    
## V3                              1.125e-06  3.162e-07   3.558 0.000374 ***
## V4.10th                        -2.353e+01  3.391e+04  -0.001 0.999446    
## V4.11th                        -7.339e-01  2.154e-01  -3.407 0.000657 ***
## V4.12th                        -8.115e-01  3.520e-01  -2.306 0.021122 *  
## V4.1st.4th                             NA         NA      NA       NA    
## V4.5th.6th                             NA         NA      NA       NA    
## V4.7th.8th                             NA         NA      NA       NA    
## V4.9th                                 NA         NA      NA       NA    
## V4.Assoc.acdm                   3.451e-01  1.499e-01   2.301 0.021367 *  
## V4.Assoc.voc                    1.242e-01  1.531e-01   0.811 0.417150    
## V4.Bachelors                    9.138e-01  9.759e-02   9.364  < 2e-16 ***
## V4.Doctorate                           NA         NA      NA       NA    
## V4.HS.grad                     -4.593e-01  9.413e-02  -4.879 1.06e-06 ***
## V4.Masters                      1.376e+00  1.190e-01  11.562  < 2e-16 ***
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                  2.500e+00  2.257e-01  11.078  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -1.558e-01  2.409e-01  -0.647 0.517796    
## V6.Married.AF.spouse            3.907e+00  1.026e+00   3.806 0.000141 ***
## V6.Married.civ.spouse           2.198e+00  5.607e-01   3.921 8.82e-05 ***
## V6.Married.spouse.absent        1.211e-01  3.877e-01   0.312 0.754776    
## V6.Never.married               -6.083e-01  2.532e-01  -2.402 0.016285 *  
## V6.Separated                   -3.332e-01  3.308e-01  -1.007 0.313815    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 6.236e-02  1.744e-01   0.358 0.720698    
## V7.Armed.Forces                -2.400e+01  3.307e+05   0.000 0.999942    
## V7.Craft.repair                 1.423e-01  1.550e-01   0.918 0.358509    
## V7.Exec.managerial              9.258e-01  1.546e-01   5.987 2.13e-09 ***
## V7.Farming.fishing             -6.734e-01  2.304e-01  -2.923 0.003466 ** 
## V7.Handlers.cleaners           -4.640e-01  2.816e-01  -1.648 0.099437 .  
## V7.Machine.op.inspct           -5.604e-01  2.131e-01  -2.630 0.008535 ** 
## V7.Other.service               -1.012e+00  2.282e-01  -4.434 9.27e-06 ***
## V7.Priv.house.serv             -3.434e+00  2.578e+00  -1.332 0.182856    
## V7.Prof.specialty               4.856e-01  1.627e-01   2.984 0.002845 ** 
## V7.Protective.serv              6.623e-01  2.319e-01   2.855 0.004300 ** 
## V7.Sales                        2.612e-01  1.609e-01   1.623 0.104617    
## V7.Tech.support                 5.647e-01  2.079e-01   2.716 0.006607 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.451e+00  1.585e-01  -9.153  < 2e-16 ***
## V8.Not.in.family               -7.229e-01  5.263e-01  -1.373 0.169607    
## V8.Other.relative              -1.611e+00  4.430e-01  -3.637 0.000276 ***
## V8.Own.child                   -1.620e+00  5.343e-01  -3.032 0.002427 ** 
## V8.Unmarried                   -7.305e-01  5.377e-01  -1.358 0.174315    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           2.862e-02  3.225e-01   0.089 0.929271    
## V9.Asian.Pac.Islander          -9.959e-02  2.457e-01  -0.405 0.685292    
## V9.Black                       -5.255e-02  1.305e-01  -0.403 0.687217    
## V9.Other                        1.851e-01  4.201e-01   0.441 0.659551    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -9.044e-01  1.192e-01  -7.590 3.21e-14 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.290e-04  1.718e-05  19.155  < 2e-16 ***
## V12                             8.095e-04  6.614e-05  12.240  < 2e-16 ***
## V13                             2.604e-02  2.853e-03   9.128  < 2e-16 ***
## V14..                          -5.844e-01  9.204e-01  -0.635 0.525453    
## V14.Cambodia                    1.853e+00  1.345e+00   1.377 0.168381    
## V14.Canada                      7.088e-02  9.975e-01   0.071 0.943353    
## V14.China                      -3.302e-01  1.091e+00  -0.303 0.762038    
## V14.Columbia                   -2.514e+01  7.035e+04   0.000 0.999715    
## V14.Cuba                        1.833e-01  1.046e+00   0.175 0.860916    
## V14.Dominican.Republic         -2.412e+01  7.824e+04   0.000 0.999754    
## V14.Ecuador                    -1.048e+00  1.648e+00  -0.636 0.524769    
## V14.El.Salvador                -1.346e-01  1.188e+00  -0.113 0.909734    
## V14.England                     6.579e-01  9.931e-01   0.662 0.507712    
## V14.France                      6.376e-01  1.315e+00   0.485 0.627643    
## V14.Germany                     5.010e-01  9.871e-01   0.508 0.611736    
## V14.Greece                     -1.217e+00  1.223e+00  -0.995 0.319698    
## V14.Guatemala                  -6.987e-01  2.228e+00  -0.314 0.753790    
## V14.Haiti                      -7.312e-01  1.381e+00  -0.530 0.596353    
## V14.Holand.Netherlands         -2.210e+01  3.347e+05   0.000 0.999947    
## V14.Honduras                   -2.281e+01  1.486e+05   0.000 0.999878    
## V14.Hong                        1.758e+00  1.381e+00   1.273 0.203080    
## V14.Hungary                     1.821e+00  1.986e+00   0.917 0.359233    
## V14.India                      -3.107e-01  9.846e-01  -0.316 0.752326    
## V14.Iran                       -4.481e-02  1.248e+00  -0.036 0.971345    
## V14.Ireland                    -2.395e+01  1.527e+05   0.000 0.999875    
## V14.Italy                       1.075e+00  1.078e+00   0.997 0.318933    
## V14.Jamaica                    -1.634e+00  1.383e+00  -1.182 0.237282    
## V14.Japan                       9.220e-01  1.119e+00   0.824 0.409816    
## V14.Laos                       -2.462e+01  1.120e+05   0.000 0.999825    
## V14.Mexico                     -4.619e-01  9.951e-01  -0.464 0.642542    
## V14.Nicaragua                  -2.398e+01  8.214e+04   0.000 0.999767    
## V14.Outlying.US.Guam.USVI.etc. -2.416e+01  1.881e+05   0.000 0.999898    
## V14.Peru                       -1.533e-01  1.459e+00  -0.105 0.916334    
## V14.Philippines                 6.989e-01  9.668e-01   0.723 0.469721    
## V14.Poland                      4.892e-01  1.098e+00   0.445 0.656029    
## V14.Portugal                   -2.413e+01  1.412e+05   0.000 0.999864    
## V14.Puerto.Rico                -2.242e-01  1.203e+00  -0.186 0.852156    
## V14.Scotland                    5.750e-02  1.410e+00   0.041 0.967467    
## V14.South                      -1.041e+00  1.098e+00  -0.948 0.343192    
## V14.Taiwan                      3.489e-01  1.100e+00   0.317 0.751137    
## V14.Thailand                   -8.134e-01  1.512e+00  -0.538 0.590554    
## V14.Trinadad.Tobago            -2.293e-01  1.788e+00  -0.128 0.897928    
## V14.United.States               1.898e-01  8.892e-01   0.213 0.830939    
## V14.Vietnam                    -1.885e+00  1.463e+00  -1.288 0.197606    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13299.2  on 11202  degrees of freedom
## Residual deviance:  7327.8  on 11109  degrees of freedom
## AIC: 7515.8
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6895   994
##      >50K     521  1358
##                                          
##                Accuracy : 0.8449         
##                  95% CI : (0.8376, 0.852)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5445         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9297         
##             Specificity : 0.5774         
##          Pos Pred Value : 0.8740         
##          Neg Pred Value : 0.7227         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7059         
##    Detection Prevalence : 0.8076         
##       Balanced Accuracy : 0.7536         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6895   994
##      >50K     521  1358
##                                          
##                Accuracy : 0.8449         
##                  95% CI : (0.8376, 0.852)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5445         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9297         
##             Specificity : 0.5774         
##          Pos Pred Value : 0.8740         
##          Neg Pred Value : 0.7227         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7059         
##    Detection Prevalence : 0.8076         
##       Balanced Accuracy : 0.7536         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.449017e-01   5.445151e-01   8.375691e-01   8.520281e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   3.448526e-96   7.644200e-34
ad_tda_kde_5.40.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9297465            0.5773810            0.8740018 
##       Neg Pred Value            Precision               Recall 
##            0.7227249            0.8740018            0.9297465 
##                   F1           Prevalence       Detection Rate 
##            0.9010127            0.7592138            0.7058763 
## Detection Prevalence    Balanced Accuracy 
##            0.8076372            0.7535637
ad_tda_kde_5.40.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n2_lr_fit_re)
diff_tda_kde_5.40.5_lr_n2_3_fold
##        Accuracy
## 1 -0.0009509028
## 2  0.0280384150
## 3 -0.0019697751
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n2_3_fold$probRight
bst_tda_kde_5.40.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.5722333
## 
## $winRight
## [1] 0.4277667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n2_3_fold
## $left
## [1] 0.1235925
## 
## $rope
## [1] 0.426803
## 
## $right
## [1] 0.4496045
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold))
#bf_tda_kde_5.40.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold)
## t = 0.8511, df = 2, p-value = 0.4844
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03395398  0.05069913
## sample estimates:
##   mean of x 
## 0.008372579
### Test set diff
diff_tda_kde_5.40.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n2_test
##    Accuracy 
## 0.008394758
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n2_test_odds.left<-bst_tda_kde_5.40.5_lr.n2_test$probLeft/bst_tda_kde_5.40.5_lr.n2_test$probRight
bst_tda_kde_5.40.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n2_test)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n2_test)) #bf_tda_pca_5.40.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n2_test))

##Node3

Adult_TDA_KDE_5.40.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.40.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n3_LrFit0
## Generalized Linear Model 
## 
## 10351 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6901, 6900, 6901 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8308375  0.5760205
Adult_TDA_KDE_5.40.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8414493 0.6017940    Fold1
## 2 0.8316430 0.5722788    Fold2
## 3 0.8194203 0.5539889    Fold3
ad_tda_kde_5.40.5_n3_lr_fit_re<-Adult_TDA_KDE_5.40.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (19 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -5.442e+12  1.082e+13   -0.503 0.614894    
## V1                              6.072e-02  3.986e-03   15.234  < 2e-16 ***
## V2..                            5.442e+12  1.082e+13    0.503 0.614894    
## V2.Federal.gov                  5.442e+12  1.082e+13    0.503 0.614894    
## V2.Local.gov                    5.442e+12  1.082e+13    0.503 0.614894    
## V2.Never.worked                -4.498e+15  1.082e+13 -415.815  < 2e-16 ***
## V2.Private                      5.442e+12  1.082e+13    0.503 0.614894    
## V2.Self.emp.inc                 5.442e+12  1.082e+13    0.503 0.614894    
## V2.Self.emp.not.inc             5.442e+12  1.082e+13    0.503 0.614894    
## V2.State.gov                    5.442e+12  1.082e+13    0.503 0.614894    
## V2.Without.pay                  5.442e+12  1.082e+13    0.503 0.614894    
## V3                              1.137e-06  4.582e-07    2.482 0.013054 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                        -9.076e-02  3.513e-01   -0.258 0.796118    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   5.015e-01  1.444e-01    3.472 0.000517 ***
## V4.Assoc.voc                    3.809e-01  1.435e-01    2.654 0.007949 ** 
## V4.Bachelors                    1.218e+00  9.825e-02   12.396  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -1.749e-01  9.321e-02   -1.877 0.060585 .  
## V4.Masters                      1.568e+00  1.493e-01   10.503  < 2e-16 ***
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -5.096e-02  3.358e-01   -0.152 0.879399    
## V6.Married.AF.spouse            3.786e+00  1.228e+00    3.083 0.002052 ** 
## V6.Married.civ.spouse           2.273e+00  5.456e-01    4.166 3.10e-05 ***
## V6.Married.spouse.absent        6.208e-02  4.724e-01    0.131 0.895439    
## V6.Never.married               -2.785e-01  3.464e-01   -0.804 0.421473    
## V6.Separated                   -1.224e-01  4.115e-01   -0.297 0.766196    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.099e-01  1.716e-01    1.223 0.221165    
## V7.Armed.Forces                -2.358e+01  2.328e+05    0.000 0.999919    
## V7.Craft.repair                 1.714e-01  1.522e-01    1.126 0.260112    
## V7.Exec.managerial              1.029e+00  1.540e-01    6.683 2.34e-11 ***
## V7.Farming.fishing             -8.919e-01  2.579e-01   -3.458 0.000545 ***
## V7.Handlers.cleaners           -4.710e-01  2.636e-01   -1.787 0.073959 .  
## V7.Machine.op.inspct           -1.628e-01  1.876e-01   -0.868 0.385445    
## V7.Other.service               -6.540e-01  2.227e-01   -2.936 0.003320 ** 
## V7.Priv.house.serv             -2.325e+01  5.408e+04    0.000 0.999657    
## V7.Prof.specialty               6.886e-01  1.615e-01    4.263 2.02e-05 ***
## V7.Protective.serv              1.027e+00  2.257e-01    4.550 5.36e-06 ***
## V7.Sales                        6.394e-01  1.581e-01    4.045 5.24e-05 ***
## V7.Tech.support                 6.904e-01  1.990e-01    3.469 0.000522 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.494e+00  1.705e-01   -8.764  < 2e-16 ***
## V8.Not.in.family               -1.007e+00  4.577e-01   -2.199 0.027883 *  
## V8.Other.relative              -1.873e+00  4.227e-01   -4.431 9.37e-06 ***
## V8.Own.child                   -2.335e+00  4.404e-01   -5.301 1.15e-07 ***
## V8.Unmarried                   -1.133e+00  4.719e-01   -2.401 0.016346 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -6.787e-01  4.331e-01   -1.567 0.117108    
## V9.Asian.Pac.Islander           1.375e-01  2.310e-01    0.595 0.551541    
## V9.Black                       -1.395e-01  1.281e-01   -1.089 0.276125    
## V9.Other                       -6.095e-01  4.897e-01   -1.245 0.213209    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -7.951e-01  1.350e-01   -5.890 3.87e-09 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.207e-04  1.753e-05   18.297  < 2e-16 ***
## V12                             6.917e-04  6.675e-05   10.363  < 2e-16 ***
## V13                             2.357e-02  3.016e-03    7.817 5.42e-15 ***
## V14..                           2.307e-02  1.223e+00    0.019 0.984951    
## V14.Cambodia                    2.013e+00  1.490e+00    1.350 0.176891    
## V14.Canada                      1.081e+00  1.297e+00    0.834 0.404368    
## V14.China                      -6.130e-01  1.384e+00   -0.443 0.657914    
## V14.Columbia                   -2.369e+01  5.866e+04    0.000 0.999678    
## V14.Cuba                        1.418e+00  1.368e+00    1.036 0.300035    
## V14.Dominican.Republic         -2.499e-01  1.654e+00   -0.151 0.879931    
## V14.Ecuador                    -6.154e-01  1.754e+00   -0.351 0.725773    
## V14.El.Salvador                 4.170e-01  1.487e+00    0.280 0.779117    
## V14.England                     8.603e-01  1.312e+00    0.656 0.511967    
## V14.France                      1.691e+00  1.519e+00    1.113 0.265873    
## V14.Germany                     9.204e-01  1.261e+00    0.730 0.465447    
## V14.Greece                     -7.581e-01  1.493e+00   -0.508 0.611498    
## V14.Guatemala                  -2.237e+01  1.215e+05    0.000 0.999853    
## V14.Haiti                       6.880e-01  1.459e+00    0.471 0.637356    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.327e+01  2.169e+05    0.000 0.999914    
## V14.Hong                       -2.156e+01  2.294e+05    0.000 0.999925    
## V14.Hungary                    -2.462e+01  1.243e+05    0.000 0.999842    
## V14.India                       1.691e-01  1.293e+00    0.131 0.895951    
## V14.Iran                        5.009e-01  1.328e+00    0.377 0.706073    
## V14.Ireland                     1.907e+00  1.544e+00    1.235 0.216681    
## V14.Italy                       4.168e-01  1.332e+00    0.313 0.754403    
## V14.Jamaica                     5.246e-01  1.418e+00    0.370 0.711331    
## V14.Japan                       8.073e-01  1.367e+00    0.590 0.554959    
## V14.Laos                       -2.363e+01  1.420e+05    0.000 0.999867    
## V14.Mexico                     -2.971e-01  1.351e+00   -0.220 0.825969    
## V14.Nicaragua                  -2.110e+01  1.543e+05    0.000 0.999891    
## V14.Outlying.US.Guam.USVI.etc. -2.248e+01  1.749e+05    0.000 0.999897    
## V14.Peru                       -6.751e-01  1.795e+00   -0.376 0.706891    
## V14.Philippines                 1.609e+00  1.268e+00    1.269 0.204332    
## V14.Poland                      4.808e-01  1.345e+00    0.358 0.720679    
## V14.Portugal                    9.624e-01  1.529e+00    0.629 0.529083    
## V14.Puerto.Rico                -6.887e-01  1.380e+00   -0.499 0.617852    
## V14.Scotland                   -5.474e-03  1.801e+00   -0.003 0.997575    
## V14.South                      -5.974e-01  1.368e+00   -0.437 0.662327    
## V14.Taiwan                     -2.731e-02  1.412e+00   -0.019 0.984565    
## V14.Thailand                   -4.045e-01  1.792e+00   -0.226 0.821425    
## V14.Trinadad.Tobago             9.461e-02  1.933e+00    0.049 0.960974    
## V14.United.States               6.505e-01  1.204e+00    0.540 0.589131    
## V14.Vietnam                    -2.113e+00  1.651e+00   -1.280 0.200546    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12380.9  on 10350  degrees of freedom
## Residual deviance:  7087.5  on 10261  degrees of freedom
## AIC: 7267.5
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6733   930
##      >50K     683  1422
##                                           
##                Accuracy : 0.8349          
##                  95% CI : (0.8274, 0.8422)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5316          
##                                           
##  Mcnemar's Test P-Value : 9.059e-10       
##                                           
##             Sensitivity : 0.9079          
##             Specificity : 0.6046          
##          Pos Pred Value : 0.8786          
##          Neg Pred Value : 0.6755          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6893          
##    Detection Prevalence : 0.7845          
##       Balanced Accuracy : 0.7562          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6733   930
##      >50K     683  1422
##                                           
##                Accuracy : 0.8349          
##                  95% CI : (0.8274, 0.8422)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5316          
##                                           
##  Mcnemar's Test P-Value : 9.059e-10       
##                                           
##             Sensitivity : 0.9079          
##             Specificity : 0.6046          
##          Pos Pred Value : 0.8786          
##          Neg Pred Value : 0.6755          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6893          
##    Detection Prevalence : 0.7845          
##       Balanced Accuracy : 0.7562          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.348690e-01   5.315527e-01   8.273549e-01   8.421827e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.759957e-74   9.058863e-10
ad_tda_kde_5.40.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9079018            0.6045918            0.8786376 
##       Neg Pred Value            Precision               Recall 
##            0.6755344            0.8786376            0.9079018 
##                   F1           Prevalence       Detection Rate 
##            0.8930300            0.7592138            0.6892916 
## Detection Prevalence    Balanced Accuracy 
##            0.7845004            0.7562468
ad_tda_kde_5.40.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_lr_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n3_lr_fit_re)
diff_tda_kde_5.40.5_lr_n3_3_fold
##        Accuracy
## 1 -0.0009509028
## 2  0.0280384150
## 3 -0.0019697751
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n3_3_fold$probRight
bst_tda_kde_5.40.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.5806333
## 
## $winRight
## [1] 0.4193667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n3_3_fold
## $left
## [1] 0.1235925
## 
## $rope
## [1] 0.426803
## 
## $right
## [1] 0.4496045
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold))
#bf_tda_kde_5.40.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold)
## t = 0.8511, df = 2, p-value = 0.4844
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03395398  0.05069913
## sample estimates:
##   mean of x 
## 0.008372579
### Test set diff
diff_tda_kde_5.40.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n3_test
##   Accuracy 
## 0.01842752
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n3_test_odds.left<-bst_tda_kde_5.40.5_lr.n3_test$probLeft/bst_tda_kde_5.40.5_lr.n3_test$probRight
bst_tda_kde_5.40.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n3_test)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n3_test)) #bf_tda_pca_5.40.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n3_test))

##Node4

Adult_TDA_KDE_5.40.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.40.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n4_LrFit0
## Generalized Linear Model 
## 
## 8741 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5828, 5827, 5827 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8612281  0.5431938
Adult_TDA_KDE_5.40.5_n4_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8558187 0.5280364    Fold1
## 2 0.8651338 0.5527739    Fold2
## 3 0.8627316 0.5487711    Fold3
ad_tda_kde_5.40.5_n4_lr_fit_re<-Adult_TDA_KDE_5.40.5_n4_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (19 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                     2.134e+13  3.330e+13  6.410e-01 0.521537    
## V1                              7.427e-02  5.668e-03  1.310e+01  < 2e-16 ***
## V2..                           -1.687e+13  2.973e+13 -5.670e-01 0.570403    
## V2.Federal.gov                 -1.687e+13  2.972e+13 -5.680e-01 0.570294    
## V2.Local.gov                   -1.687e+13  2.973e+13 -5.680e-01 0.570339    
## V2.Never.worked                -4.520e+15  2.974e+13 -1.520e+02  < 2e-16 ***
## V2.Private                     -1.687e+13  2.972e+13 -5.680e-01 0.570204    
## V2.Self.emp.inc                -1.687e+13  2.973e+13 -5.680e-01 0.570324    
## V2.Self.emp.not.inc            -1.687e+13  2.972e+13 -5.680e-01 0.570201    
## V2.State.gov                   -1.687e+13  2.972e+13 -5.680e-01 0.570244    
## V2.Without.pay                 -4.520e+15  2.972e+13 -1.521e+02  < 2e-16 ***
## V3                              1.573e-06  6.993e-07  2.250e+00 0.024469 *  
## V4.10th                                NA         NA         NA       NA    
## V4.11th                                NA         NA         NA       NA    
## V4.12th                        -3.042e-01  4.582e-01 -6.640e-01 0.506712    
## V4.1st.4th                             NA         NA         NA       NA    
## V4.5th.6th                             NA         NA         NA       NA    
## V4.7th.8th                             NA         NA         NA       NA    
## V4.9th                                 NA         NA         NA       NA    
## V4.Assoc.acdm                   7.289e-01  2.256e-01  3.231e+00 0.001234 ** 
## V4.Assoc.voc                    4.727e-01  1.401e-01  3.375e+00 0.000739 ***
## V4.Bachelors                    1.143e+00  1.251e-01  9.141e+00  < 2e-16 ***
## V4.Doctorate                           NA         NA         NA       NA    
## V4.HS.grad                     -1.622e-01  9.156e-02 -1.771e+00 0.076507 .  
## V4.Masters                             NA         NA         NA       NA    
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                         NA         NA         NA       NA    
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                    -4.254e-01  5.627e-01 -7.560e-01 0.449703    
## V6.Married.AF.spouse            2.095e+00  1.162e+00  1.803e+00 0.071403 .  
## V6.Married.civ.spouse           2.102e+00  7.620e-01  2.758e+00 0.005810 ** 
## V6.Married.spouse.absent        1.707e-01  7.473e-01  2.280e-01 0.819276    
## V6.Never.married               -5.590e-01  5.737e-01 -9.740e-01 0.329899    
## V6.Separated                   -5.369e-01  6.451e-01 -8.320e-01 0.405223    
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                 2.327e-01  1.894e-01  1.229e+00 0.219108    
## V7.Armed.Forces                -4.504e+15  3.001e+07 -1.501e+08  < 2e-16 ***
## V7.Craft.repair                 2.033e-01  1.621e-01  1.254e+00 0.209691    
## V7.Exec.managerial              9.821e-01  1.715e-01  5.725e+00 1.03e-08 ***
## V7.Farming.fishing             -7.879e-01  3.005e-01 -2.622e+00 0.008747 ** 
## V7.Handlers.cleaners           -6.266e-01  2.813e-01 -2.228e+00 0.025907 *  
## V7.Machine.op.inspct           -2.835e-02  1.952e-01 -1.450e-01 0.884499    
## V7.Other.service               -4.776e-01  2.329e-01 -2.051e+00 0.040294 *  
## V7.Priv.house.serv             -1.469e+01  5.911e+02 -2.500e-02 0.980176    
## V7.Prof.specialty               8.882e-01  1.945e-01  4.566e+00 4.98e-06 ***
## V7.Protective.serv              1.042e+00  2.587e-01  4.027e+00 5.64e-05 ***
## V7.Sales                        5.888e-01  1.749e-01  3.366e+00 0.000762 ***
## V7.Tech.support                 8.556e-01  2.249e-01  3.805e+00 0.000142 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                     -1.572e+00  2.361e-01 -6.658e+00 2.78e-11 ***
## V8.Not.in.family               -1.003e+00  5.625e-01 -1.784e+00 0.074417 .  
## V8.Other.relative              -2.437e+00  5.794e-01 -4.206e+00 2.60e-05 ***
## V8.Own.child                   -2.175e+00  5.216e-01 -4.170e+00 3.05e-05 ***
## V8.Unmarried                   -1.376e+00  5.854e-01 -2.351e+00 0.018729 *  
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -1.179e+00  5.163e-01 -2.284e+00 0.022401 *  
## V9.Asian.Pac.Islander           4.072e-01  3.108e-01  1.310e+00 0.190105    
## V9.Black                       -3.458e-01  1.624e-01 -2.129e+00 0.033244 *  
## V9.Other                       -1.315e+00  6.762e-01 -1.945e+00 0.051799 .  
## V9.White                               NA         NA         NA       NA    
## V10.Female                     -8.700e-01  2.084e-01 -4.174e+00 2.99e-05 ***
## V10.Male                               NA         NA         NA       NA    
## V11                             3.328e-04  2.235e-05  1.489e+01  < 2e-16 ***
## V12                             5.415e-04  7.757e-05  6.981e+00 2.94e-12 ***
## V13                             2.948e-02  3.695e-03  7.978e+00 1.49e-15 ***
## V14..                          -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Cambodia                   -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Canada                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.China                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Columbia                   -4.508e+15  2.096e+13 -2.151e+02  < 2e-16 ***
## V14.Cuba                       -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Dominican.Republic         -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Ecuador                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.El.Salvador                -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.England                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.France                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Germany                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Greece                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Guatemala                  -4.508e+15  2.096e+13 -2.151e+02  < 2e-16 ***
## V14.Haiti                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                   -4.508e+15  2.096e+13 -2.151e+02  < 2e-16 ***
## V14.Hong                       -4.508e+15  2.096e+13 -2.151e+02  < 2e-16 ***
## V14.Hungary                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.India                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Iran                       -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Ireland                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Italy                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Jamaica                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Japan                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Laos                       -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Mexico                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Nicaragua                  -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Outlying.US.Guam.USVI.etc. -4.508e+15  2.096e+13 -2.151e+02  < 2e-16 ***
## V14.Peru                       -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Philippines                -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Poland                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Portugal                   -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Puerto.Rico                -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Scotland                   -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.South                      -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Taiwan                     -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Thailand                   -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Trinadad.Tobago            -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.United.States              -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Vietnam                    -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## V14.Yugoslavia                 -4.470e+12  2.096e+13 -2.130e-01 0.831085    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8821.8  on 8740  degrees of freedom
## Residual deviance: 4879.6  on 8651  degrees of freedom
## AIC: 5059.6
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6568   846
##      >50K     848  1506
##                                          
##                Accuracy : 0.8266         
##                  95% CI : (0.8189, 0.834)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.5258         
##                                          
##  Mcnemar's Test P-Value : 0.9806         
##                                          
##             Sensitivity : 0.8857         
##             Specificity : 0.6403         
##          Pos Pred Value : 0.8859         
##          Neg Pred Value : 0.6398         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6724         
##    Detection Prevalence : 0.7590         
##       Balanced Accuracy : 0.7630         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6568   846
##      >50K     848  1506
##                                          
##                Accuracy : 0.8266         
##                  95% CI : (0.8189, 0.834)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.5258         
##                                          
##  Mcnemar's Test P-Value : 0.9806         
##                                          
##             Sensitivity : 0.8857         
##             Specificity : 0.6403         
##          Pos Pred Value : 0.8859         
##          Neg Pred Value : 0.6398         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6724         
##    Detection Prevalence : 0.7590         
##       Balanced Accuracy : 0.7630         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.40.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.265766e-01   5.258061e-01   8.189201e-01   8.340378e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.052021e-59   9.806161e-01
ad_tda_kde_5.40.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8856526            0.6403061            0.8858916 
##       Neg Pred Value            Precision               Recall 
##            0.6397621            0.8858916            0.8856526 
##                   F1           Prevalence       Detection Rate 
##            0.8857721            0.7592138            0.6723997 
## Detection Prevalence    Balanced Accuracy 
##            0.7590090            0.7629794
ad_tda_kde_5.40.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n4_lr_fit_re)
diff_tda_kde_5.40.5_lr_n4_3_fold
##      Accuracy
## 1 -0.00888534
## 2 -0.00925071
## 3 -0.01226435
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n4_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n4_3_fold$probRight
bst_tda_kde_5.40.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n4_3_fold
## $winLeft
## [1] 0.3226667
## 
## $winRope
## [1] 0.6773333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n4_3_fold
## $left
## [1] 0.5380583
## 
## $rope
## [1] 0.4600671
## 
## $right
## [1] 0.001874644
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold))
#bf_tda_kde_5.40.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold)
## t = -9.4648, df = 2, p-value = 0.01098
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.014740101 -0.005526832
## sample estimates:
##   mean of x 
## -0.01013347
### Test set diff
diff_tda_kde_5.40.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n4_test
##  Accuracy 
## 0.0267199
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n4_test_odds.left<-bst_tda_kde_5.40.5_lr.n4_test$probLeft/bst_tda_kde_5.40.5_lr.n4_test$probRight
bst_tda_kde_5.40.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1597333
## 
## $winRight
## [1] 0.8402667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n4_test)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n4_test)) #bf_tda_pca_5.40.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n4_test))

##Node5

Adult_TDA_KDE_5.40.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.40.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n5_LrFit0
## Generalized Linear Model 
## 
## 6628 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 4419, 4418, 4419 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8636074  0.2775096
Adult_TDA_KDE_5.40.5_n5_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8623812 0.2931751    Fold1
## 2 0.8737557 0.4151387    Fold2
## 3 0.8546854 0.1242149    Fold3
ad_tda_kde_5.40.5_n5_lr_fit_re<-Adult_TDA_KDE_5.40.5_n5_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (23 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.737e+13  3.923e+13   -0.443 0.657889    
## V1                              8.029e-02  7.893e-03   10.173  < 2e-16 ***
## V2..                            1.737e+13  3.923e+13    0.443 0.657889    
## V2.Federal.gov                  1.737e+13  3.923e+13    0.443 0.657889    
## V2.Local.gov                    1.737e+13  3.923e+13    0.443 0.657889    
## V2.Never.worked                -4.486e+15  3.923e+13 -114.370  < 2e-16 ***
## V2.Private                      1.737e+13  3.923e+13    0.443 0.657889    
## V2.Self.emp.inc                 1.737e+13  3.923e+13    0.443 0.657889    
## V2.Self.emp.not.inc             1.737e+13  3.923e+13    0.443 0.657889    
## V2.State.gov                    1.737e+13  3.923e+13    0.443 0.657889    
## V2.Without.pay                  1.737e+13  3.923e+13    0.443 0.657889    
## V3                              2.658e-06  1.072e-06    2.479 0.013192 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                                NA         NA       NA       NA    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                          NA         NA       NA       NA    
## V4.Assoc.voc                    4.525e-01  2.170e-01    2.085 0.037046 *  
## V4.Bachelors                           NA         NA       NA       NA    
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -3.360e-01  9.127e-02   -3.681 0.000232 ***
## V4.Masters                             NA         NA       NA       NA    
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -5.043e-01  1.080e+00   -0.467 0.640579    
## V6.Married.AF.spouse            4.044e+00  1.723e+00    2.346 0.018957 *  
## V6.Married.civ.spouse           1.844e+00  1.181e+00    1.562 0.118387    
## V6.Married.spouse.absent       -5.915e-01  1.326e+00   -0.446 0.655531    
## V6.Never.married               -9.496e-01  1.081e+00   -0.878 0.379787    
## V6.Separated                   -8.007e-01  1.153e+00   -0.694 0.487494    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.966e-01  2.268e-01    1.308 0.190857    
## V7.Armed.Forces                -2.304e+01  1.579e+05    0.000 0.999884    
## V7.Craft.repair                 2.749e-01  1.769e-01    1.554 0.120233    
## V7.Exec.managerial              8.084e-01  1.961e-01    4.123 3.73e-05 ***
## V7.Farming.fishing             -3.908e-01  3.405e-01   -1.148 0.251029    
## V7.Handlers.cleaners           -3.787e-01  2.768e-01   -1.368 0.171286    
## V7.Machine.op.inspct            5.938e-02  2.111e-01    0.281 0.778506    
## V7.Other.service               -3.837e-01  2.667e-01   -1.438 0.150302    
## V7.Priv.house.serv             -2.345e+01  7.742e+04    0.000 0.999758    
## V7.Prof.specialty               1.171e+00  2.472e-01    4.737 2.17e-06 ***
## V7.Protective.serv              9.087e-01  2.903e-01    3.130 0.001748 ** 
## V7.Sales                        4.306e-01  1.970e-01    2.186 0.028815 *  
## V7.Tech.support                 1.063e+00  2.781e-01    3.824 0.000131 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.094e+00  3.263e-01   -3.354 0.000797 ***
## V8.Not.in.family               -5.420e-01  6.132e-01   -0.884 0.376805    
## V8.Other.relative              -1.391e+00  6.401e-01   -2.173 0.029784 *  
## V8.Own.child                   -1.515e+00  5.780e-01   -2.620 0.008791 ** 
## V8.Unmarried                   -1.360e+00  6.819e-01   -1.994 0.046160 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -1.217e+00  5.881e-01   -2.069 0.038543 *  
## V9.Asian.Pac.Islander           7.260e-02  5.204e-01    0.140 0.889046    
## V9.Black                       -4.920e-01  2.153e-01   -2.285 0.022314 *  
## V9.Other                       -1.844e+00  1.070e+00   -1.723 0.084895 .  
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -6.073e-01  2.794e-01   -2.174 0.029702 *  
## V10.Male                               NA         NA       NA       NA    
## V11                             3.723e-04  2.826e-05   13.173  < 2e-16 ***
## V12                             5.691e-04  9.226e-05    6.169 6.89e-10 ***
## V13                             2.781e-02  4.368e-03    6.367 1.93e-10 ***
## V14..                          -2.797e+00  1.456e+00   -1.921 0.054746 .  
## V14.Cambodia                   -2.696e+01  1.388e+05    0.000 0.999845    
## V14.Canada                     -2.908e+00  1.819e+00   -1.598 0.109997    
## V14.China                      -2.394e+00  1.744e+00   -1.372 0.169975    
## V14.Columbia                   -2.725e+01  8.472e+04    0.000 0.999743    
## V14.Cuba                       -1.738e+00  1.640e+00   -1.059 0.289423    
## V14.Dominican.Republic         -1.902e+01  1.535e+03   -0.012 0.990112    
## V14.Ecuador                    -1.200e+00  1.736e+00   -0.691 0.489309    
## V14.El.Salvador                -2.763e+01  7.608e+04    0.000 0.999710    
## V14.England                    -2.983e+00  1.757e+00   -1.698 0.089458 .  
## V14.France                     -2.497e+01  1.947e+05    0.000 0.999898    
## V14.Germany                    -2.378e+00  1.509e+00   -1.576 0.115093    
## V14.Greece                     -3.194e+00  2.119e+00   -1.507 0.131748    
## V14.Guatemala                  -1.895e+00  1.821e+00   -1.041 0.298014    
## V14.Haiti                      -2.470e+01  1.224e+05    0.000 0.999839    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.627e+01  2.368e+05    0.000 0.999912    
## V14.Hong                       -2.789e+01  1.462e+05    0.000 0.999848    
## V14.Hungary                    -2.736e+01  3.817e+05    0.000 0.999943    
## V14.India                      -2.663e+01  9.540e+04    0.000 0.999777    
## V14.Iran                       -1.641e+00  1.985e+00   -0.827 0.408386    
## V14.Ireland                    -2.755e+00  1.799e+00   -1.532 0.125566    
## V14.Italy                      -4.157e+00  1.889e+00   -2.200 0.027780 *  
## V14.Jamaica                    -1.689e+00  1.629e+00   -1.037 0.299929    
## V14.Japan                      -2.745e+01  8.283e+04    0.000 0.999736    
## V14.Laos                       -2.759e+01  1.612e+05    0.000 0.999863    
## V14.Mexico                     -3.184e+00  1.508e+00   -2.112 0.034703 *  
## V14.Nicaragua                  -2.646e+01  1.047e+05    0.000 0.999798    
## V14.Outlying.US.Guam.USVI.etc. -2.611e+01  1.678e+05    0.000 0.999876    
## V14.Peru                       -2.723e+01  1.231e+05    0.000 0.999823    
## V14.Philippines                -1.799e+00  1.626e+00   -1.106 0.268518    
## V14.Poland                     -3.008e+00  1.805e+00   -1.667 0.095575 .  
## V14.Portugal                   -2.232e+00  1.871e+00   -1.193 0.232859    
## V14.Puerto.Rico                -3.876e+00  1.790e+00   -2.166 0.030336 *  
## V14.Scotland                   -2.850e+01  2.340e+05    0.000 0.999903    
## V14.South                      -3.116e+00  1.943e+00   -1.604 0.108675    
## V14.Taiwan                     -3.571e+00  1.969e+00   -1.814 0.069729 .  
## V14.Thailand                   -9.201e-01  3.534e+00   -0.260 0.794571    
## V14.Trinadad.Tobago            -2.830e+01  2.341e+05    0.000 0.999904    
## V14.United.States              -2.581e+00  1.398e+00   -1.846 0.064923 .  
## V14.Vietnam                    -2.804e+00  1.895e+00   -1.480 0.138916    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5623.5  on 6627  degrees of freedom
## Residual deviance: 3522.3  on 6542  degrees of freedom
## AIC: 3694.3
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6465   866
##      >50K     951  1486
##                                           
##                Accuracy : 0.814           
##                  95% CI : (0.8061, 0.8217)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.4974          
##                                           
##  Mcnemar's Test P-Value : 0.04877         
##                                           
##             Sensitivity : 0.8718          
##             Specificity : 0.6318          
##          Pos Pred Value : 0.8819          
##          Neg Pred Value : 0.6098          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6619          
##    Detection Prevalence : 0.7505          
##       Balanced Accuracy : 0.7518          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6465   866
##      >50K     951  1486
##                                           
##                Accuracy : 0.814           
##                  95% CI : (0.8061, 0.8217)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.4974          
##                                           
##  Mcnemar's Test P-Value : 0.04877         
##                                           
##             Sensitivity : 0.8718          
##             Specificity : 0.6318          
##          Pos Pred Value : 0.8819          
##          Neg Pred Value : 0.6098          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6619          
##    Detection Prevalence : 0.7505          
##       Balanced Accuracy : 0.7518          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.139844e-01   4.974288e-01   8.061234e-01   8.216578e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.867829e-39   4.876791e-02
ad_tda_kde_5.40.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8717638            0.6318027            0.8818715 
##       Neg Pred Value            Precision               Recall 
##            0.6097661            0.8818715            0.8717638 
##                   F1           Prevalence       Detection Rate 
##            0.8767885            0.7592138            0.6618550 
## Detection Prevalence    Balanced Accuracy 
##            0.7505119            0.7517832
ad_tda_kde_5.40.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n5_lr_fit_re)
diff_tda_kde_5.40.5_lr_n5_3_fold
##       Accuracy
## 1 -0.015447764
## 2 -0.017872529
## 3 -0.004218088
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n5_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n5_3_fold$probRight
bst_tda_kde_5.40.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n5_3_fold
## $winLeft
## [1] 0.5117667
## 
## $winRope
## [1] 0.4882333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n5_3_fold
## $left
## [1] 0.6717891
## 
## $rope
## [1] 0.3064492
## 
## $right
## [1] 0.02176175
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold))
#bf_tda_kde_5.40.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold)
## t = -2.975, df = 2, p-value = 0.09685
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.030609780  0.005584192
## sample estimates:
##   mean of x 
## -0.01251279
### Test set diff
diff_tda_kde_5.40.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n5_test
##   Accuracy 
## 0.03931204
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_lr.n5_test_odds.left<-bst_tda_kde_5.40.5_lr.n5_test$probLeft/bst_tda_kde_5.40.5_lr.n5_test$probRight
bst_tda_kde_5.40.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1582333
## 
## $winRight
## [1] 0.8417667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n5_test)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n5_test)) #bf_tda_pca_5.40.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n5_test))


#naiveBayes 
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7665954  0.04554121
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
adultNbFit$resample
##    Accuracy       Kappa Resample
## 1 0.7746479 0.094018742    Fold1
## 2 0.7654646 0.039289981    Fold2
## 3 0.7596736 0.003314921    Fold3
ad_nb_fit_re<-adultNbFit$resample[1]

summary(adultNbFit)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
#varImp (adultNbFit)



# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2219
##      >50K       0   133
##                                           
##                Accuracy : 0.7728          
##                  95% CI : (0.7644, 0.7811)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0008047       
##                                           
##                   Kappa : 0.0834          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.05655         
##          Pos Pred Value : 0.76969         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98638         
##       Balanced Accuracy : 0.52827         
##                                           
##        'Positive' Class :  <=50K          
## 
nb_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.772829648    0.083417992    0.764388511    0.781107809    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.000804745    0.000000000
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.05654762           0.76969382 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.76969382           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.86986101           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.98638411           0.52827381
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.40.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V14.., V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n1_NbFit0
## Naive Bayes 
## 
## 3373 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 2249, 2249, 2248 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9922918    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.40.5_n1_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9919929     0    Fold1
## 2 0.9928826     0    Fold2
## 3 0.9920000     0    Fold3
ad_tda_pc_5.40.5_n1_nb_fit_re<-Adult_TDA_PC_5.40.5_n1_NbFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.40.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.40.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.40.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n1_nb_fit_re)
diff_tda_pca_5.40.5_nb_n1_3_fold
##     Accuracy
## 1 -0.2173450
## 2 -0.2274180
## 3 -0.2323264
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n1_3_fold$probRight
bst_tda_pca_5.40.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n1_3_fold
## $winLeft
## [1] 0.9905333
## 
## $winRope
## [1] 0.009466667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n1_3_fold
## $left
## [1] 0.9997216
## 
## $rope
## [1] 4.520956e-05
## 
## $right
## [1] 0.0002331816
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold))
#bf_tda_pca_5.40.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold)
## t = -51.183, df = 2, p-value = 0.0003815
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2446694 -0.2067235
## sample estimates:
##  mean of x 
## -0.2256965
### Test set diff
diff_tda_pca_5.40.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n1_test
##  Accuracy 
## 0.5320434
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n1_test_odds.left<-bst_tda_pca_5.40.5_nb.n1_test$probLeft/bst_tda_pca_5.40.5_nb.n1_test$probRight
bst_tda_pca_5.40.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1615333
## 
## $winRight
## [1] 0.8384667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n1_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n1_test)) #bf_tda_pca_5.40.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n1_test))

##Node2

Adult_TDA_PC_5.40.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Own.child, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n2_NbFit0
## Naive Bayes 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6850, 6851 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.6108408    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.40.5_n2_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.6108029     0    Fold1
## 2 0.6109165     0    Fold2
## 3 0.6108029     0    Fold3
ad_tda_pc_5.40.5_n2_nb_fit_re<-Adult_TDA_PC_5.40.5_n2_NbFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.40.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.40.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.40.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n2_nb_fit_re)
diff_tda_pca_5.40.5_nb_n2_3_fold
##    Accuracy
## 1 0.1638450
## 2 0.1545481
## 3 0.1488707
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n2_3_fold$probRight
bst_tda_pca_5.40.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008766667
## 
## $winRight
## [1] 0.9912333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n2_3_fold
## $left
## [1] 0.0004615979
## 
## $rope
## [1] 0.0001351271
## 
## $right
## [1] 0.9994033
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold))
#bf_tda_pca_5.40.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold)
## t = 35.686, df = 2, p-value = 0.0007843
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1369752 0.1745339
## sample estimates:
## mean of x 
## 0.1557546
### Test set diff
diff_tda_pca_5.40.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n2_test
##  Accuracy 
## 0.5320434
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n2_test_odds.left<-bst_tda_pca_5.40.5_nb.n2_test$probLeft/bst_tda_pca_5.40.5_nb.n2_test$probRight
bst_tda_pca_5.40.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1591
## 
## $winRight
## [1] 0.8409
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n2_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n2_test)) #bf_tda_pca_5.40.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n2_test))

##Node3

Adult_TDA_PC_5.40.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Portugal
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n3_NbFit0
## Naive Bayes 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7709, 7708, 7709 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.7917496    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.40.5_n3_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.7916450     0    Fold1
## 2 0.7916991     0    Fold2
## 3 0.7919045     0    Fold3
ad_tda_pc_5.40.5_n3_nb_fit_re<-Adult_TDA_PC_5.40.5_n3_NbFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.40.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2332
##      >50K       0    20
##                                           
##                Accuracy : 0.7613          
##                  95% CI : (0.7527, 0.7697)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.3228          
##                                           
##                   Kappa : 0.0129          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.000000        
##             Specificity : 0.008503        
##          Pos Pred Value : 0.760771        
##          Neg Pred Value : 1.000000        
##              Prevalence : 0.759214        
##          Detection Rate : 0.759214        
##    Detection Prevalence : 0.997952        
##       Balanced Accuracy : 0.504252        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2332
##      >50K       0    20
##                                           
##                Accuracy : 0.7613          
##                  95% CI : (0.7527, 0.7697)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.3228          
##                                           
##                   Kappa : 0.0129          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.000000        
##             Specificity : 0.008503        
##          Pos Pred Value : 0.760771        
##          Neg Pred Value : 1.000000        
##              Prevalence : 0.759214        
##          Detection Rate : 0.759214        
##    Detection Prevalence : 0.997952        
##       Balanced Accuracy : 0.504252        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76126126     0.01285513     0.75267868     0.76968780     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.32281324     0.00000000
ad_tda_pc_5.40.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          1.000000000          0.008503401          0.760771440 
##       Neg Pred Value            Precision               Recall 
##          1.000000000          0.760771440          1.000000000 
##                   F1           Prevalence       Detection Rate 
##          0.864134234          0.759213759          0.759213759 
## Detection Prevalence    Balanced Accuracy 
##          0.997952498          0.504251701
ad_tda_pc_5.40.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n3_nb_fit_re)
diff_tda_pca_5.40.5_nb_n3_3_fold
##      Accuracy
## 1 -0.01699716
## 2 -0.02623450
## 3 -0.03223092
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n3_3_fold$probRight
bst_tda_pca_5.40.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n3_3_fold
## $winLeft
## [1] 0.9633667
## 
## $winRope
## [1] 0.03663333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n3_3_fold
## $left
## [1] 0.951211
## 
## $rope
## [1] 0.03852412
## 
## $right
## [1] 0.01026489
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold))
#bf_tda_pca_5.40.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold)
## t = -5.6773, df = 2, p-value = 0.02965
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.044217767 -0.006090613
## sample estimates:
##   mean of x 
## -0.02515419
### Test set diff
diff_tda_pca_5.40.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n3_test
##   Accuracy 
## 0.01156839
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n3_test_odds.left<-bst_tda_pca_5.40.5_nb.n3_test$probLeft/bst_tda_pca_5.40.5_nb.n3_test$probRight
bst_tda_pca_5.40.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1580333
## 
## $winRight
## [1] 0.8419667
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n3_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n3_test)) #bf_tda_pca_5.40.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n3_test))

##Node4

Adult_TDA_PC_5.40.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n4_NbFit0
## Naive Bayes 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9878, 9879, 9879 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9574167    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.40.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9572874     0    Fold1
## 2 0.9574813     0    Fold2
## 3 0.9574813     0    Fold3
ad_tda_pc_5.40.5_n4_nb_fit_re<-Adult_TDA_PC_5.40.5_n4_NbFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.40.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.40.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.40.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_nb_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n4_nb_fit_re)
diff_tda_pca_5.40.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1826396
## 2 -0.1920167
## 3 -0.1978077
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n4_3_fold$probRight
bst_tda_pca_5.40.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n4_3_fold
## $winLeft
## [1] 0.9914333
## 
## $winRope
## [1] 0.008566667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n4_3_fold
## $left
## [1] 0.9996023
## 
## $rope
## [1] 7.520336e-05
## 
## $right
## [1] 0.0003225284
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold))
#bf_tda_pca_5.40.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold)
## t = -43.179, df = 2, p-value = 0.0005359
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2098358 -0.1718068
## sample estimates:
##  mean of x 
## -0.1908213
### Test set diff
diff_tda_pca_5.40.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n4_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n4_test_odds.left<-bst_tda_pca_5.40.5_nb.n4_test$probLeft/bst_tda_pca_5.40.5_nb.n4_test$probRight
bst_tda_pca_5.40.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4648
## 
## $winRight
## [1] 0.5352
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n4_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n4_test)) #bf_tda_pca_5.40.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n4_test))

##Node5

Adult_TDA_PC_5.40.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.HS.grad, V4.Masters, V4.Preschool, V4.Prof.school, V4.Some.college, V5, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.United.States, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Own.child, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n5_NbFit0
## Naive Bayes 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8053, 8054, 8055 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9996689    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.40.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9995035     0    Fold1
## 2 0.9997517     0    Fold2
## 3 0.9997516     0    Fold3
ad_tda_pc_5.40.5_n5_nb_fit_re<-Adult_TDA_PC_5.40.5_n5_NbFit0$resample[1]

summary(Adult_TDA_PC_5.40.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.40.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.40.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.40.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.40.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.40.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n5_nb_fit_re)
diff_tda_pca_5.40.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2248556
## 2 -0.2342871
## 3 -0.2400780
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n5_3_fold$probRight
bst_tda_pca_5.40.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n5_3_fold
## $winLeft
## [1] 0.9916333
## 
## $winRope
## [1] 0.008366667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n5_3_fold
## $left
## [1] 0.9997366
## 
## $rope
## [1] 4.153807e-05
## 
## $right
## [1] 0.0002218875
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold))
#bf_tda_pca_5.40.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold)
## t = -52.541, df = 2, p-value = 0.000362
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2521603 -0.2139869
## sample estimates:
##  mean of x 
## -0.2330736
### Test set diff
diff_tda_pca_5.40.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n5_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.40.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.40.5_nb.n5_test_odds.left<-bst_tda_pca_5.40.5_nb.n5_test$probLeft/bst_tda_pca_5.40.5_nb.n5_test$probRight
bst_tda_pca_5.40.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.40.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4554667
## 
## $winRight
## [1] 0.5445333
# Bayesian Correlated Test

bct_tda_pca_5.40.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n5_test)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n5_test)) #bf_tda_pca_5.40.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.40.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Vietnam
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n1_NbFit0
## Naive Bayes 
## 
## 11838 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7892, 7892, 7892 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7522386  0.06447648
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.40.5_n1_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7592499 0.10220627    Fold1
## 2 0.7569691 0.09122316    Fold2
## 3 0.7404967 0.00000000    Fold3
ad_tda_kde_5.40.5_n1_nb_fit_re<-Adult_TDA_KDE_5.40.5_n1_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.40.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2206
##      >50K       0   146
##                                           
##                Accuracy : 0.7742          
##                  95% CI : (0.7657, 0.7824)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.000264        
##                                           
##                   Kappa : 0.0913          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.06207         
##          Pos Pred Value : 0.77073         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98505         
##       Balanced Accuracy : 0.53104         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2206
##      >50K       0   146
##                                           
##                Accuracy : 0.7742          
##                  95% CI : (0.7657, 0.7824)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.000264        
##                                           
##                   Kappa : 0.0913          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.06207         
##          Pos Pred Value : 0.77073         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98505         
##       Balanced Accuracy : 0.53104         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7741605242   0.0913174168   0.7657362290   0.7824210465   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   0.0002639809   0.0000000000
ad_tda_kde_5.40.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.40.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n1_nb_fit_re)
diff_tda_kde_5.40.5_nb_n1_3_fold
##      Accuracy
## 1 0.015398014
## 2 0.008495513
## 3 0.019176893
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n1_3_fold$probRight
bst_tda_kde_5.40.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3014667
## 
## $winRight
## [1] 0.6985333
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n1_3_fold
## $left
## [1] 0.01063927
## 
## $rope
## [1] 0.1648335
## 
## $right
## [1] 0.8245272
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold))
#bf_tda_kde_5.40.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold)
## t = 4.5911, df = 2, p-value = 0.04431
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.00090203 0.02781158
## sample estimates:
##  mean of x 
## 0.01435681
### Test set diff
diff_tda_kde_5.40.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n1_test
##     Accuracy 
## -0.001330876
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n1_test_odds.left<-bst_tda_kde_5.40.5_nb.n1_test$probLeft/bst_tda_kde_5.40.5_nb.n1_test$probRight
bst_tda_kde_5.40.5_nb.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n1_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n1_test)) #bf_tda_pca_5.40.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n1_test))

##Node2

Adult_TDA_KDE_5.40.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n2_NbFit0
## Naive Bayes 
## 
## 10276 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6851, 6850, 6851 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.6108408    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.40.5_n2_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.6108029     0    Fold1
## 2 0.6109165     0    Fold2
## 3 0.6108029     0    Fold3
ad_tda_kde_5.40.5_n2_nb_fit_re<-Adult_TDA_KDE_5.40.5_n2_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.40.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_kde_5.40.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_kde_5.40.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n2_nb_fit_re)
diff_tda_kde_5.40.5_nb_n2_3_fold
##    Accuracy
## 1 0.1638450
## 2 0.1545481
## 3 0.1488707
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n2_3_fold$probRight
bst_tda_kde_5.40.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008433333
## 
## $winRight
## [1] 0.9915667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n2_3_fold
## $left
## [1] 0.0004615979
## 
## $rope
## [1] 0.0001351271
## 
## $right
## [1] 0.9994033
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold))
#bf_tda_kde_5.40.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold)
## t = 35.686, df = 2, p-value = 0.0007843
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1369752 0.1745339
## sample estimates:
## mean of x 
## 0.1557546
### Test set diff
diff_tda_kde_5.40.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n2_test
##  Accuracy 
## 0.5320434
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n2_test_odds.left<-bst_tda_kde_5.40.5_nb.n2_test$probLeft/bst_tda_kde_5.40.5_nb.n2_test$probRight
bst_tda_kde_5.40.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1570333
## 
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n2_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n2_test)) #bf_tda_kde_5.40.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n2_test))

##Node3

Adult_TDA_KDE_5.40.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n3_NbFit0
## Naive Bayes 
## 
## 11563 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7708, 7709, 7709 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.7917496    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.40.5_n3_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.7916991     0    Fold1
## 2 0.7919045     0    Fold2
## 3 0.7916450     0    Fold3
ad_tda_kde_5.40.5_n3_nb_fit_re<-Adult_TDA_KDE_5.40.5_n3_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.40.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2332
##      >50K       0    20
##                                           
##                Accuracy : 0.7613          
##                  95% CI : (0.7527, 0.7697)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.3228          
##                                           
##                   Kappa : 0.0129          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.000000        
##             Specificity : 0.008503        
##          Pos Pred Value : 0.760771        
##          Neg Pred Value : 1.000000        
##              Prevalence : 0.759214        
##          Detection Rate : 0.759214        
##    Detection Prevalence : 0.997952        
##       Balanced Accuracy : 0.504252        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2332
##      >50K       0    20
##                                           
##                Accuracy : 0.7613          
##                  95% CI : (0.7527, 0.7697)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.3228          
##                                           
##                   Kappa : 0.0129          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.000000        
##             Specificity : 0.008503        
##          Pos Pred Value : 0.760771        
##          Neg Pred Value : 1.000000        
##              Prevalence : 0.759214        
##          Detection Rate : 0.759214        
##    Detection Prevalence : 0.997952        
##       Balanced Accuracy : 0.504252        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76126126     0.01285513     0.75267868     0.76968780     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.32281324     0.00000000
ad_tda_kde_5.40.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          1.000000000          0.008503401          0.760771440 
##       Neg Pred Value            Precision               Recall 
##          1.000000000          0.760771440          1.000000000 
##                   F1           Prevalence       Detection Rate 
##          0.864134234          0.759213759          0.759213759 
## Detection Prevalence    Balanced Accuracy 
##          0.997952498          0.504251701
ad_tda_kde_5.40.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n3_nb_fit_re)
diff_tda_kde_5.40.5_nb_n3_3_fold
##      Accuracy
## 1 -0.01705120
## 2 -0.02643992
## 3 -0.03197145
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n3_3_fold$probRight
bst_tda_kde_5.40.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n3_3_fold
## $winLeft
## [1] 0.9642
## 
## $winRope
## [1] 0.0358
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n3_3_fold
## $left
## [1] 0.9526393
## 
## $rope
## [1] 0.03743396
## 
## $right
## [1] 0.009926781
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold))
#bf_tda_kde_5.40.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold)
## t = -5.7762, df = 2, p-value = 0.02869
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.043891443 -0.006416936
## sample estimates:
##   mean of x 
## -0.02515419
### Test set diff
diff_tda_kde_5.40.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n3_test
##   Accuracy 
## 0.01156839
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n3_test_odds.left<-bst_tda_kde_5.40.5_nb.n3_test$probLeft/bst_tda_kde_5.40.5_nb.n3_test$probRight
bst_tda_kde_5.40.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4631
## 
## $winRight
## [1] 0.5369
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n3_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n3_test)) #bf_tda_kde_5.40.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n3_test))


##Node4

Adult_TDA_KDE_5.40.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V6.Married.AF.spouse, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.China, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Jamaica, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n4_NbFit0
## Naive Bayes 
## 
## 14818 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9879, 9879, 9878 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9574167    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.40.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9574813     0    Fold1
## 2 0.9574813     0    Fold2
## 3 0.9572874     0    Fold3
ad_tda_kde_5.40.5_n4_nb_fit_re<-Adult_TDA_KDE_5.40.5_n4_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.40.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.40.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.40.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n4_nb_fit_re)
diff_tda_kde_5.40.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1828334
## 2 -0.1920167
## 3 -0.1976139
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n4_3_fold$probRight
bst_tda_kde_5.40.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n4_3_fold
## $winLeft
## [1] 0.9909333
## 
## $winRope
## [1] 0.009066667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n4_3_fold
## $left
## [1] 0.9996219
## 
## $rope
## [1] 7.148566e-05
## 
## $right
## [1] 0.000306566
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold))
#bf_tda_kde_5.40.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold)
## t = -44.29, df = 2, p-value = 0.0005094
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2093589 -0.1722837
## sample estimates:
##  mean of x 
## -0.1908213
### Test set diff
diff_tda_kde_5.40.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n4_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n4_test_odds.left<-bst_tda_kde_5.40.5_nb.n4_test$probLeft/bst_tda_kde_5.40.5_nb.n4_test$probRight
bst_tda_kde_5.40.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4611
## 
## $winRight
## [1] 0.5389
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n4_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n4_test)) #bf_tda_kde_5.40.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n4_test))

##Node5

Adult_TDA_KDE_5.40.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.HS.grad, V4.Masters, V4.Preschool, V4.Prof.school, V4.Some.college, V5, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Own.child, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.United.States, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n5_NbFit0
## Naive Bayes 
## 
## 12081 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8053, 8055, 8054 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9996689    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.40.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9995035     0    Fold1
## 2 0.9997516     0    Fold2
## 3 0.9997517     0    Fold3
ad_tda_kde_5.40.5_n5_nb_fit_re<-Adult_TDA_KDE_5.40.5_n5_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.40.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.40.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.40.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.40.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.40.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.40.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n5_nb_fit_re)
diff_tda_kde_5.40.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2248556
## 2 -0.2342870
## 3 -0.2400781
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n5_3_fold$probRight
bst_tda_kde_5.40.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n5_3_fold
## $winLeft
## [1] 0.9906333
## 
## $winRope
## [1] 0.009366667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n5_3_fold
## $left
## [1] 0.9997366
## 
## $rope
## [1] 4.153832e-05
## 
## $right
## [1] 0.0002218888
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold))
#bf_tda_kde_5.40.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold)
## t = -52.541, df = 2, p-value = 0.0003621
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2521603 -0.2139868
## sample estimates:
##  mean of x 
## -0.2330736
### Test set diff
diff_tda_kde_5.40.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n5_test
##   Accuracy 
## 0.01361589
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.40.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.40.5_nb.n5_test_odds.left<-bst_tda_kde_5.40.5_nb.n5_test$probLeft/bst_tda_kde_5.40.5_nb.n5_test$probRight
bst_tda_kde_5.40.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.40.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4555333
## 
## $winRight
## [1] 0.5444667
# Bayesian Correlated Test

bct_tda_kde_5.40.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n5_test)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n5_test)) #bf_tda_kde_5.40.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n5_test))